From e46f0fee3066240389e20dc847a281274dc81d2e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Mar 2020 14:37:51 -0500 Subject: [PATCH] [OPENMP50]Codegen for 'depend' clause in depobj directive. Added codegen for 'depend' clause in depobj directive. The depend clause is emitted as kmp_depend_info [ + 1]. The first element in this array is reserved for storing the number of elements in this array: [0].base_addr = ; This extra element is required to implement 'update' and 'destroy' clauses. It is required to know the size of array to destroy it correctly and to update depency kind. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 113 ++++++++++++++++++++++++---------- clang/lib/CodeGen/CGOpenMPRuntime.h | 10 +++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 14 ++++- clang/test/OpenMP/depobj_codegen.cpp | 83 +++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 33 deletions(-) create mode 100644 clang/test/OpenMP/depobj_codegen.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9fe0306..ce23434e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -5185,29 +5186,21 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, return Result; } -void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPExecutableDirective &D, - llvm::Function *TaskFunction, - QualType SharedsTy, Address Shareds, - const Expr *IfCond, - const OMPTaskDataTy &Data) { - if (!CGF.HaveInsertPoint()) - return; - - TaskResultTy Result = - emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); - llvm::Value *NewTask = Result.NewTask; - llvm::Function *TaskEntry = Result.TaskEntry; - llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; - LValue TDBase = Result.TDBase; - const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; +Address CGOpenMPRuntime::emitDependClause( + CodeGenFunction &CGF, + ArrayRef> Dependencies, + bool ForDepobj, SourceLocation Loc) { + // Process list of dependencies. ASTContext &C = CGM.getContext(); - // Process list of dependences. Address DependenciesArray = Address::invalid(); - unsigned NumDependencies = Data.Dependences.size(); + unsigned NumDependencies = Dependencies.size(); if (NumDependencies) { // Dependence kind for RTL. - enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; + enum RTLDependenceKindTy { + DepIn = 0x01, + DepInOut = 0x3, + DepMutexInOutSet = 0x4 + }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; QualType FlagsTy = @@ -5224,15 +5217,47 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, } else { KmpDependInfoRD = cast(KmpDependInfoTy->getAsTagDecl()); } - // Define type kmp_depend_info[]; + // Define type kmp_depend_info[]; + // For depobj reserve one extra element to store the number of elements. + // It is required to handle depobj(x) update(in) construct. QualType KmpDependInfoArrayTy = C.getConstantArrayType( - KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), + KmpDependInfoTy, + llvm::APInt(/*numBits=*/64, NumDependencies + (ForDepobj ? 1 : 0)), nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - // kmp_depend_info[] deps; - DependenciesArray = - CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); + // kmp_depend_info[] deps; + if (ForDepobj) { + // Need to allocate on the dynamic memory. + llvm::Value *ThreadID = getThreadID(CGF, Loc); + // Use default allocator. + llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy); + CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); + llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align)); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo()); + DependenciesArray = Address(Addr, Align); + } else { + DependenciesArray = + CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); + } + if (ForDepobj) { + // Write number of elements in the first element of array for depobj. + llvm::Value *NumVal = + llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); + LValue Base = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), + KmpDependInfoTy); + // deps[i].base_addr = NumDependencies; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal); + } for (unsigned I = 0; I < NumDependencies; ++I) { - const Expr *E = Data.Dependences[I].second; + const Expr *E = Dependencies[I].second; LValue Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); @@ -5249,22 +5274,23 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, } else { Size = CGF.getTypeSize(Ty); } - LValue Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), - KmpDependInfoTy); - // deps[i].base_addr = &; + LValue Base = + CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( + DependenciesArray, I + (ForDepobj ? 1 : 0)), + KmpDependInfoTy); + // deps[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), BaseAddrLVal); - // deps[i].len = sizeof(); + // deps[i].len = sizeof(); LValue LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); CGF.EmitStoreOfScalar(Size, LenLVal); - // deps[i].flags = ; + // deps[i].flags = ; RTLDependenceKindTy DepKind; - switch (Data.Dependences[I].first) { + switch (Dependencies[I].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; @@ -5289,6 +5315,29 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); } + return DependenciesArray; +} + +void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Function *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, + const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint()) + return; + + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); + llvm::Value *NewTask = Result.NewTask; + llvm::Function *TaskEntry = Result.TaskEntry; + llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; + LValue TDBase = Result.TDBase; + const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; + // Process list of dependences. + Address DependenciesArray = + emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc); + unsigned NumDependencies = Data.Dependences.size(); // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f559e0d..54223d3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1776,6 +1776,16 @@ public: LValue PrivLVal, const VarDecl *VD, SourceLocation Loc); + + /// Emits list of dependecies based on the provided data (array of + /// dependence/expression pairs). + /// \param ForDepobj true if the memory for depencies is alloacted for depobj + /// directive. In this case, the variable is allocated in dynamically. + /// \returns Pointer to the first element of the array casted to VoidPtr type. + Address emitDependClause( + CodeGenFunction &CGF, + ArrayRef> Dependencies, + bool ForDepobj, SourceLocation Loc); }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index bab7c6d..822542d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3800,7 +3800,19 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { S.getBeginLoc(), AO); } -void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {} +void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { + const auto *DO = S.getSingleClause(); + LValue DOLVal = EmitLValue(DO->getDepobj()); + if (const auto *DC = S.getSingleClause()) { + SmallVector, 4> + Dependencies; + for (const Expr *IRef : DC->varlists()) + Dependencies.emplace_back(DC->getDependencyKind(), IRef); + Address DepAddr = CGM.getOpenMPRuntime().emitDependClause( + *this, Dependencies, /*ForDepobj=*/true, DC->getBeginLoc()); + EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); + } +} void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, diff --git a/clang/test/OpenMP/depobj_codegen.cpp b/clang/test/OpenMP/depobj_codegen.cpp new file mode 100644 index 0000000..7a8264a --- /dev/null +++ b/clang/test/OpenMP/depobj_codegen.cpp @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -fopenmp-version=50 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -fopenmp-version=50 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[MAIN_A:@.+]] = internal global i8* null, +// CHECK-DAG: [[TMAIN_A:@.+]] = linkonce_odr global i8* null, + +typedef void *omp_depend_t; + +void foo() {} + +template +T tmain(T argc) { + static T a; +#pragma omp depobj(a) depend(in:argc) +#pragma omp depobj(argc) destroy +#pragma omp depobj(argc) update(inout) + return argc; +} + +int main(int argc, char **argv) { + static omp_depend_t a; + omp_depend_t b; +#pragma omp depobj(a) depend(out:argc, argv) +#pragma omp depobj(b) destroy +#pragma omp depobj(b) update(mutexinoutset) + (void)tmain(a), tmain(b); + return 0; +} + +// CHECK-LABEL: @main +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: [[DEP_ADDR_VOID:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 72, i8* null) +// CHECK: [[DEP_ADDR:%.+]] = bitcast i8* [[DEP_ADDR_VOID]] to [3 x %struct.kmp_depend_info]* +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[SZ_BASE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: store i64 2, i64* [[SZ_BASE]], +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: store i64 %{{.+}}, i64* [[ADDR]], +// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1 +// CHECK: store i64 4, i64* [[SZ_ADDR]], +// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2 +// CHECK: store i8 3, i8* [[FLAGS_ADDR]], +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: store i64 %{{.+}}, i64* [[ADDR]], +// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1 +// CHECK: store i64 8, i64* [[SZ_ADDR]], +// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2 +// CHECK: store i8 3, i8* [[FLAGS_ADDR]], +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[BASE_ADDR]] to i8* +// CHECK: store i8* [[DEP]], i8** [[MAIN_A]], + +// CHECK-LABEL: tmain +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: [[DEP_ADDR_VOID:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 48, i8* null) +// CHECK: [[DEP_ADDR:%.+]] = bitcast i8* [[DEP_ADDR_VOID]] to [2 x %struct.kmp_depend_info]* +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[SZ_BASE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: store i64 1, i64* [[SZ_BASE]], +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: store i64 %{{.+}}, i64* [[ADDR]], +// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1 +// CHECK: store i64 8, i64* [[SZ_ADDR]], +// CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2 +// CHECK: store i8 1, i8* [[FLAGS_ADDR]], +// CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[BASE_ADDR]] to i8* +// CHECK: store i8* [[DEP]], i8** [[TMAIN_A]], + +#endif -- 2.7.4