From a58da1a2ff039dd3bb4c43db3919995cf4a74cc7 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 27 Dec 2019 09:44:43 -0500 Subject: [PATCH] [OPENMP50]Codegen for lastprivate conditional list items. Added codegen support for lastprivate conditional. According to the standard, if when the conditional modifier appears on the clause, if an assignment to a list item is encountered in the construct then the original list item is assigned the value that is assigned to the new list item in the sequentially last iteration or lexically last section in which such an assignment is encountered. We look for the assignment operations and check if the left side references lastprivate conditional variable. Then the next code is emitted: if (last_iv_a <= iv) { last_iv_a = iv; last_a = lp_a; } At the end the implicit barrier is generated to wait for the end of all threads and then in the check for the last iteration the private copy is assigned the last value. if (last_iter) { lp_a = last_a; // <--- new code a = lp_a; // <--- store of private value to the original variable. } --- clang/lib/CodeGen/CGExpr.cpp | 3 + clang/lib/CodeGen/CGExprComplex.cpp | 7 +- clang/lib/CodeGen/CGExprScalar.cpp | 4 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 263 ++++++++++++++++++++++++++ clang/lib/CodeGen/CGOpenMPRuntime.h | 57 ++++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 26 ++- clang/test/OpenMP/for_lastprivate_codegen.cpp | 49 ++++- clang/test/OpenMP/simd_codegen.cpp | 52 +++-- 8 files changed, 443 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index c729037..e43ed50 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4717,6 +4717,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); return LV; } diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 6b11969..f7a4e9e 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/StmtVisitor.h" @@ -1136,7 +1137,11 @@ ComplexPairTy CodeGenFunction::EmitLoadOfComplex(LValue src, LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) { assert(E->getOpcode() == BO_Assign); ComplexPairTy Val; // ignored - return ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + LValue LVal = ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); + return LVal; } typedef ComplexPairTy (ComplexExprEmitter::*CompoundFunc)( diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 12bf37e..d759d36 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -14,6 +14,7 @@ #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -2997,6 +2998,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); + if (CGF.getLangOpts().OpenMP) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, + E->getLHS()); return LHSLV; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 59f352d..735cacf 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" @@ -11401,6 +11402,268 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); } +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) + : CGM(CGF.CGM), + NeedToPush(llvm::any_of(S.getClausesOfKind(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const auto *C : S.getClausesOfKind()) { + if (C->getKind() != OMPC_LASTPRIVATE_conditional) + continue; + + for (const Expr *Ref : C->varlists()) { + Data.DeclToUniqeName.try_emplace( + cast(Ref->IgnoreParenImpCasts())->getDecl(), + generateUniqueName(CGM, "pl_cond", Ref)); + } + } + Data.IVLVal = IVLVal; + // In simd only mode or for simd directives no need to generate threadprivate + // references for the loop iteration counter, we can use the original one + // since outlining cannot happen in simd regions. + if (CGF.getLangOpts().OpenMPSimd || + isOpenMPSimdDirective(S.getDirectiveKind())) { + Data.UseOriginalIV = true; + return; + } + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream OS(Buffer); + PresumedLoc PLoc = + CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + CGM.getDiags().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" + << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; + Data.IVName = OS.str(); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv = &iv; + QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType()); + Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, PtrIVTy, Data.IVName); + LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, PtrIVTy); + CGF.EmitStoreOfScalar(IVLVal.getPointer(CGF), GlobIVLVal); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); +} + +namespace { +/// Checks if the lastprivate conditional variable is referenced in LHS. +class LastprivateConditionalRefChecker final + : public ConstStmtVisitor { + CodeGenFunction &CGF; + ArrayRef LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + SourceLocation Loc; + bool UseOriginalIV = false; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitMemberExpr(const MemberExpr *E) { + if (!CGF.IsWrappedCXXThis(E->getBase())) + return false; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getMemberDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + if (const auto *E = dyn_cast(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; + } + explicit LastprivateConditionalRefChecker( + CodeGenFunction &CGF, + ArrayRef LPM) + : CGF(CGF), LPM(LPM) {} + std::tuple + getFoundData() const { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, + UseOriginalIV); + } +}; +} // namespace + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50) + return; + LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + bool UseOriginalIV; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = + Checker.getFoundData(); + + // Last updated loop counter for the lastprivate conditional var. + // int last_iv = 0; + llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); + llvm::Constant *LastIV = + getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + cast(LastIV)->setAlignment( + IVLVal.getAlignment().getAsAlign()); + LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + // Last value of the lastprivate conditional. + // decltype(priv_a) last_a; + llvm::Constant *Last = getOrCreateInternalVariable( + LVal.getAddress(CGF).getElementType(), UniqueDeclName); + cast(Last)->setAlignment( + LVal.getAlignment().getAsAlign()); + LValue LastLVal = + CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv + if (!UseOriginalIV) { + QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType()); + Address IVAddr = getAddrOfArtificialThreadPrivate(CGF, PtrIVTy, IVName); + IVLVal = + CGF.EmitLoadOfPointerLValue(IVAddr, PtrIVTy->castAs()); + } + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + + // #pragma omp critical(a) + // if (last_iv <= iv) { + // last_iv = iv; + // last_a = priv_a; + // } + auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, + FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *LastIVVal = + CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); + // (last_iv <= global_iv) ? Check if the variable is updated and store new + // value in global var. + llvm::Value *CmpRes; + if (IVLVal.getType()->isSignedIntegerType()) { + CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); + } else { + assert(IVLVal.getType()->isUnsignedIntegerType() && + "Loop iteration variable must be integer."); + CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); + CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); + // { + CGF.EmitBlock(ThenBB); + + // last_iv = global_iv; + CGF.EmitStoreOfScalar(IVVal, LastIVLVal); + + // last_a = priv_a; + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: { + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfScalar(PrivVal, LastLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy PrivVal = + CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + llvm_unreachable( + "Aggregates are not supported in lastprivate conditional."); + } + // } + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + if (CGM.getLangOpts().OpenMPSimd) { + // Do not emit as a critical region as no parallel region could be emitted. + RegionCodeGenTy ThenRCG(CodeGen); + ThenRCG(CGF); + } else { + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + } +} + +void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( + CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, + SourceLocation Loc) { + if (CGF.getLangOpts().OpenMP < 50) + return; + auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + "Unknown lastprivate conditional variable."); + StringRef UniqueName = It->getSecond(); + llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); + // The variable was not updated in the region - exit. + if (!GV) + return; + LValue LPLVal = CGF.MakeAddrLValue( + GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); + llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); + CGF.EmitStoreOfScalar(Res, PrivLVal); +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index bcaa06a..2a6a6b9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -222,6 +222,33 @@ public: ~NontemporalDeclsRAII(); }; + /// Maps the expression for the lastprivate variable to the global copy used + /// to store new value because original variables are not mapped in inner + /// parallel regions. Only private copies are captured but we need also to + /// store private copy in shared address. + /// Also, stores the expression for the private loop counter and it + /// threaprivate name. + struct LastprivateConditionalData { + llvm::SmallDenseMap, SmallString<16>> + DeclToUniqeName; + LValue IVLVal; + SmallString<16> IVName; + /// True if original lvalue for loop counter can be used in codegen (simd + /// region or simd only mode) and no need to create threadprivate + /// references. + bool UseOriginalIV = false; + }; + /// Manages list of lastprivate conditional decls for the specified directive. + class LastprivateConditionalRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + LastprivateConditionalRAII(CodeGenFunction &CGF, + const OMPExecutableDirective &S, LValue IVLVal); + ~LastprivateConditionalRAII(); + }; + protected: CodeGenModule &CGM; StringRef FirstSeparator, Separator; @@ -666,6 +693,11 @@ private: /// The set is the union of all current stack elements. llvm::SmallVector NontemporalDeclsStack; + /// Stack for list of addresses of declarations in current context marked as + /// lastprivate conditional. The set is the union of all current stack + /// elements. + llvm::SmallVector LastprivateConditionalStack; + /// Flag for keeping track of weather a requires unified_shared_memory /// directive is present. bool HasRequiresUnifiedSharedMemory = false; @@ -1683,6 +1715,31 @@ public: /// Checks if the \p VD variable is marked as nontemporal declaration in /// current context. bool isNontemporalDecl(const ValueDecl *VD) const; + + /// Checks if the provided \p LVal is lastprivate conditional and emits the + /// code to update the value of the original variable. + /// \code + /// lastprivate(conditional: a) + /// ... + /// a; + /// lp_a = ...; + /// #pragma omp critical(a) + /// if (last_iv_a <= iv) { + /// last_iv_a = iv; + /// global_a = lp_a; + /// } + /// \endcode + virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS); + + /// Gets the address of the global copy used for lastprivate conditional + /// update, if any. + /// \param PrivLVal LValue for the private copy. + /// \param VD Original lastprivate declaration. + virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, + LValue PrivLVal, + const VarDecl *VD, + SourceLocation Loc); }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a38a79b..cac0e7d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1045,6 +1045,18 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( llvm::BasicBlock *ThenBB = nullptr; llvm::BasicBlock *DoneBB = nullptr; if (IsLastIterCond) { + // Emit implicit barrier if at least one lastprivate conditional is found + // and this is not a simd mode. + if (!getLangOpts().OpenMPSimd && + llvm::any_of(D.getClausesOfKind(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), + OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } ThenBB = createBasicBlock(".omp.lastprivate.then"); DoneBB = createBasicBlock(".omp.lastprivate.done"); Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); @@ -1083,14 +1095,19 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( cast(cast(*ISrcRef)->getDecl()); const auto *DestVD = cast(cast(*IDestRef)->getDecl()); - // Get the address of the original variable. - Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); if (const auto *RefTy = PrivateVD->getType()->getAs()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), getNaturalTypeAlignment(RefTy->getPointeeType())); + // Store the last value to the private copy in the last iteration. + if (C->getKind() == OMPC_LASTPRIVATE_conditional) + CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( + *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, + (*IRef)->getExprLoc()); + // Get the address of the original variable. + Address OriginalAddr = GetAddrOfLocalVar(DestVD); EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); } ++IRef; @@ -1974,6 +1991,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPLinearClause(S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + CGF, S, CGF.EmitLValue(S.getIterationVariable())); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) @@ -2546,6 +2565,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + *this, S, EmitLValue(S.getIterationVariable())); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); @@ -2856,6 +2877,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp index b7c82c2..ecaf81a 100644 --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -3,17 +3,33 @@ // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=OMP50 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER #define HEADER +#ifdef OMP5 +#define CONDITIONAL conditional : +#else +#define CONDITIONAL +#endif //OMP5 + enum omp_allocator_handle_t { omp_null_allocator = 0, omp_default_mem_alloc = 1, @@ -160,6 +176,10 @@ char cnt; // CHECK-DAG: [[X:@.+]] = global double 0.0 // CHECK-DAG: [[F:@.+]] = global float 0.0 // CHECK-DAG: [[CNT:@.+]] = global i8 0 +// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32* null +// OMP50-DAG: [[LAST_IV_F:@.+]] = {{.*}}common global i32 0 +// OMP50-DAG: [[LAST_F:@.+]] = {{.*}}common global float 0.000000e+00, + template T tmain() { S test; @@ -477,9 +497,10 @@ int main() { A::x++; } #pragma omp parallel -#pragma omp for allocate(omp_const_mem_alloc :cnt) lastprivate(cnt) +#pragma omp for allocate(omp_const_mem_alloc :cnt) lastprivate(cnt) lastprivate(CONDITIONAL f) for (cnt = 0; cnt < 2; ++cnt) { A::x++; + f = 0; } return tmain(); #endif @@ -642,6 +663,9 @@ int main() { // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32** [[IV_REF]] to i8*), i64 8, i8*** @{{.+}}) +// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32** +// OMP50: store i32* %{{.+}}, i32** [[BC]], // CHECK: [[CNT_PRIV:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 1, i8* inttoptr (i64 3 to i8*)) // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) @@ -653,11 +677,30 @@ int main() { // CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]] // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] // +// CHECK: store float 0.000000e+00, float* [[F_PRIV:%.+]], +// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32** [[IV_REF]] to i8*), i64 8, i8*** @{{.+}}) +// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32** +// OMP50: [[IV_ADDR:%.+]] = load i32*, i32** [[BC]], +// OMP50: [[IV:%.+]] = load i32, i32* [[IV_ADDR]], +// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION:@.+]]) +// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_F]], +// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]] +// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]] + +// OMP50: [[LP_THEN]]: +// OMP50: store i32 [[IV]], i32* [[LAST_IV_F]], +// OMP50: [[F_VAL:%.+]] = load float, float* [[F_PRIV]], +// OMP50: store float [[F_VAL]], float* [[LAST_F]], +// OMP50: br label %[[LP_DONE]] + +// OMP50: [[LP_DONE]]: +// OMP50: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION]]) // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) // Check for final copying of private values back to original vars. // CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], // CHECK: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0 +// OMP50-NEXT: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: br i1 [[IS_LAST_ITER:%.+]], label %[[LAST_THEN:.+]], label %[[LAST_DONE:.+]] // CHECK: [[LAST_THEN]] @@ -666,6 +709,10 @@ int main() { // original cnt=private_cnt; // CHECK: [[CNT_VAL:%.+]] = load i8, i8* [[CNT_PRIV]], // CHECK: store i8 [[CNT_VAL]], i8* [[CNT]], +// OMP50: [[F_VAL:%.+]] = load float, float* [[LAST_F]], +// OMP50: store float [[F_VAL]], float* [[F_PRIV]], +// CHECK: [[F_VAL:%.+]] = load float, float* [[F_PRIV]], +// CHECK: store float [[F_VAL]], float* [[F]], // CHECK-NEXT: br label %[[LAST_DONE]] // CHECK: [[LAST_DONE]] diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index aac76ad..cb53bb1a 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -1,15 +1,15 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=45 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=45 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - -fopenmp-version=45 | FileCheck %s --check-prefix=TERM_DEBUG +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 --check-prefix=OMP50RT // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=50 -DOMP5 -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 --check-prefix=OMP50RT -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix=TERM_DEBUG %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=45 | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=45 +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - -fopenmp-version=45 | FileCheck --check-prefix=TERM_DEBUG %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=50 -DOMP5 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 @@ -17,7 +17,14 @@ #ifndef HEADER #define HEADER +#ifdef OMP5 +#define CONDITIONAL conditional : +#else +#define CONDITIONAL +#endif //OMP5 // CHECK: [[SS_TY:%.+]] = type { i32 } +// OMP50-DAG: [[LAST_IV:@.+]] = {{.*}}common global i64 0 +// OMP50-DAG: [[LAST_A:@.+]] = {{.*}}common global i32 0 long long get_val() { return 0; } double *g_ptr; @@ -214,7 +221,7 @@ void simple(float *a, float *b, float *c, float *d) { int A; // CHECK: store i32 -1, i32* [[A:%.+]], A = -1; - #pragma omp simd lastprivate(A) + #pragma omp simd lastprivate(CONDITIONAL A) // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]] // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]]: @@ -229,15 +236,32 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] // CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.access.group // CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.access.group -// CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32 -// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.access.group - A = i; +// CHECK-NEXT: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.access.group +// CHECK-NEXT: [[CAST:%.+]] = sext i32 [[A_VAL]] to i64 +// CHECK-NEXT: [[ADD:%.+]] = add nsw i64 [[CAST]], [[LC_VAL]] +// CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[ADD]] to i32 +// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV]],{{.+}}!llvm.access.group +// OMP50-NEXT: [[IV:%.+]] = load i64, i64* [[OMP_IV7]],{{.+}}!llvm.access.group +// OMP50RT: call void @__kmpc_critical(%struct.ident_t* {{.+}}, i32 [[GTID:%.+]], [8 x i32]* [[A_REGION:@.+]]),{{.+}}!llvm.access.group +// OMP50-NEXT: [[LAST_IV_VAL:%.+]] = load i64, i64* [[LAST_IV]],{{.+}}!llvm.access.group +// OMP50-NEXT: [[CMP:%.+]] = icmp sle i64 [[LAST_IV_VAL]], [[IV]] +// OMP50-NEXT: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]] +// OMP50: [[LP_THEN]]: +// OMP50-NEXT: store i64 [[IV]], i64* [[LAST_IV]],{{.+}}!llvm.access.group +// OMP50-NEXT: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV]],{{.+}}!llvm.access.group +// OMP50-NEXT: store i32 [[A_VAL]], i32* [[LAST_A]],{{.+}}!llvm.access.group +// OMP50-NEXT: br label %[[LP_DONE]] +// OMP50: [[LP_DONE]]: +// OMP50RT-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.+}}, i32 [[GTID]], [8 x i32]* [[A_REGION]]),{{.+}}!llvm.access.group + A += i; // CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1 // CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.access.group } // CHECK: [[SIMPLE_LOOP7_END]]: // CHECK-NEXT: store i64 11, i64* +// OMP50-NEXT: [[LAST_A_VAL:%.+]] = load i32, i32* [[LAST_A]], +// OMP50-NEXT: store i32 [[LAST_A_VAL]], i32* [[A_PRIV]], // CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]], // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]], int R; -- 2.7.4