Added codegen for task-based directive with in_reduction clause.
```
<body>
```
The next code is emitted:
```
void *td;
...
td = call i8* @__kmpc_task_reduction_init();
...
<type> *priv = (<type> *)call i8* @__kmpc_task_reduction_get_th_data(i32
GTID, i8* td, i8* <orig>)
```
llvm-svn: 309270
return llvm::makeArrayRef(getRHSExprs().end(), varlist_size());
}
+ /// Set list of helper reduction taskgroup descriptors.
+ void setTaskgroupDescriptors(ArrayRef<Expr *> ReductionOps);
+
+ /// Get the list of helper reduction taskgroup descriptors.
+ MutableArrayRef<Expr *> getTaskgroupDescriptors() {
+ return MutableArrayRef<Expr *>(getReductionOps().end(), varlist_size());
+ }
+ ArrayRef<const Expr *> getTaskgroupDescriptors() const {
+ return llvm::makeArrayRef(getReductionOps().end(), varlist_size());
+ }
+
public:
/// Creates clause with a list of variables \a VL.
///
/// \endcode
/// Required for proper codegen of final reduction operation performed by the
/// reduction clause.
+ /// \param TaskgroupDescriptors List of helper taskgroup descriptors for
+ /// corresponding items in parent taskgroup task_reduction clause.
/// \param PreInit Statement that must be executed before entering the OpenMP
/// region with this clause.
/// \param PostUpdate Expression that must be executed after exit from the
NestedNameSpecifierLoc QualifierLoc,
const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> Privates,
ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
- ArrayRef<Expr *> ReductionOps, Stmt *PreInit, Expr *PostUpdate);
+ ArrayRef<Expr *> ReductionOps, ArrayRef<Expr *> TaskgroupDescriptors,
+ Stmt *PreInit, Expr *PostUpdate);
/// Creates an empty clause with the place for \a N variables.
///
return helper_expr_range(getReductionOps().begin(),
getReductionOps().end());
}
+ helper_expr_const_range taskgroup_descriptors() const {
+ return helper_expr_const_range(getTaskgroupDescriptors().begin(),
+ getTaskgroupDescriptors().end());
+ }
+ helper_expr_range taskgroup_descriptors() {
+ return helper_expr_range(getTaskgroupDescriptors().begin(),
+ getTaskgroupDescriptors().end());
+ }
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
for (auto *E : C->reduction_ops()) {
TRY_TO(TraverseStmt(E));
}
+ for (auto *E : C->taskgroup_descriptors())
+ TRY_TO(TraverseStmt(E));
return true;
}
std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
}
+void OMPInReductionClause::setTaskgroupDescriptors(
+ ArrayRef<Expr *> TaskgroupDescriptors) {
+ assert(TaskgroupDescriptors.size() == varlist_size() &&
+ "Number of in reduction descriptors is not the same as the "
+ "preallocated buffer");
+ std::copy(TaskgroupDescriptors.begin(), TaskgroupDescriptors.end(),
+ getReductionOps().end());
+}
+
OMPInReductionClause *OMPInReductionClause::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL,
NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
ArrayRef<Expr *> Privates, ArrayRef<Expr *> LHSExprs,
- ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps, Stmt *PreInit,
- Expr *PostUpdate) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size()));
+ ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps,
+ ArrayRef<Expr *> TaskgroupDescriptors, Stmt *PreInit, Expr *PostUpdate) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(6 * VL.size()));
OMPInReductionClause *Clause = new (Mem) OMPInReductionClause(
StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
Clause->setVarRefs(VL);
Clause->setLHSExprs(LHSExprs);
Clause->setRHSExprs(RHSExprs);
Clause->setReductionOps(ReductionOps);
+ Clause->setTaskgroupDescriptors(TaskgroupDescriptors);
Clause->setPreInitStmt(PreInit);
Clause->setPostUpdateExpr(PostUpdate);
return Clause;
OMPInReductionClause *OMPInReductionClause::CreateEmpty(const ASTContext &C,
unsigned N) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * N));
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(6 * N));
return new (Mem) OMPInReductionClause(N);
}
if (E)
Profiler->VisitStmt(E);
}
+ for (auto *E : C->taskgroup_descriptors()) {
+ if (E)
+ Profiler->VisitStmt(E);
+ }
}
void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) {
VisitOMPClauseList(C);
RedCG, Cnt);
}
}
+ // Privatize all private variables except for in_reduction items.
(void)Scope.Privatize();
+ SmallVector<const Expr *, 4> InRedVars;
+ SmallVector<const Expr *, 4> InRedPrivs;
+ SmallVector<const Expr *, 4> InRedOps;
+ SmallVector<const Expr *, 4> TaskgroupDescriptors;
+ for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
+ auto IPriv = C->privates().begin();
+ auto IRed = C->reduction_ops().begin();
+ auto ITD = C->taskgroup_descriptors().begin();
+ for (const auto *Ref : C->varlists()) {
+ InRedVars.emplace_back(Ref);
+ InRedPrivs.emplace_back(*IPriv);
+ InRedOps.emplace_back(*IRed);
+ TaskgroupDescriptors.emplace_back(*ITD);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ std::advance(ITD, 1);
+ }
+ }
+ // Privatize in_reduction items here, because taskgroup descriptors must be
+ // privatized earlier.
+ OMPPrivateScope InRedScope(CGF);
+ if (!InRedVars.empty()) {
+ ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
+ for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
+ RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitAggregateType(CGF, Cnt);
+ // The taskgroup descriptor variable is always implicit firstprivate and
+ // privatized already during procoessing of the firstprivates.
+ llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
+ CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
+ Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
+ CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+ Replacement = Address(
+ CGF.EmitScalarConversion(
+ Replacement.getPointer(), CGF.getContext().VoidPtrTy,
+ CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
+ SourceLocation()),
+ Replacement.getAlignment());
+ Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
+ InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
+ [Replacement]() { return Replacement; });
+ // FIXME: This must removed once the runtime library is fixed.
+ // Emit required threadprivate variables for
+ // initilizer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ RedCG, Cnt);
+ }
+ }
+ (void)InRedScope.Privatize();
Action.Enter(CGF);
BodyGen(CGF);
/// Returns the location and reduction operation from the innermost parent
/// region for the given \p D.
DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
- BinaryOperatorKind &BOK);
+ BinaryOperatorKind &BOK,
+ Expr *&TaskgroupDescriptor);
/// Returns the location and reduction operation from the innermost parent
/// region for the given \p D.
DSAVarData getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
- const Expr *&ReductionRef);
+ const Expr *&ReductionRef,
+ Expr *&TaskgroupDescriptor);
/// Return reduction reference expression for the current taskgroup.
Expr *getTaskgroupReductionRef() const {
assert(Stack.back().first.back().Directive == OMPD_taskgroup &&
"directive.");
return Stack.back().first.back().TaskgroupReductionRef;
}
+ /// Checks if the given \p VD declaration is actually a taskgroup reduction
+ /// descriptor variable at the \p Level of OpenMP regions.
+ bool isTaskgroupReductionRef(ValueDecl *VD, unsigned Level) const {
+ return Stack.back().first[Level].TaskgroupReductionRef &&
+ cast<DeclRefExpr>(Stack.back().first[Level].TaskgroupReductionRef)
+ ->getDecl() == VD;
+ }
/// \brief Returns data sharing attributes from top of the stack for the
/// specified declaration.
DSAStackTy::DSAVarData
DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
- BinaryOperatorKind &BOK) {
+ BinaryOperatorKind &BOK,
+ Expr *&TaskgroupDescriptor) {
D = getCanonicalDecl(D);
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
if (Stack.back().first.empty())
return DSAVarData();
SR = ReductionData.ReductionRange;
BOK = ReductionData.ReductionOp.get<ReductionData::BOKPtrType>();
+ assert(I->TaskgroupReductionRef && "taskgroup reduction reference "
+ "expression for the descriptor is not "
+ "set.");
+ TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc);
}
DSAStackTy::DSAVarData
DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR,
- const Expr *&ReductionRef) {
+ const Expr *&ReductionRef,
+ Expr *&TaskgroupDescriptor) {
D = getCanonicalDecl(D);
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
if (Stack.back().first.empty())
return DSAVarData();
SR = ReductionData.ReductionRange;
ReductionRef = ReductionData.ReductionOp.get<const Expr *>();
+ assert(I->TaskgroupReductionRef && "taskgroup reduction reference "
+ "expression for the descriptor is not "
+ "set.");
+ TaskgroupDescriptor = I->TaskgroupReductionRef;
return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
Data.PrivateCopy, I->DefaultAttrLoc);
}
bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
return DSAStack->hasExplicitDSA(
- D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
+ D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; },
+ Level) ||
+ // Consider taskgroup reduction descriptor variable a private to avoid
+ // possible capture in the region.
+ (DSAStack->hasExplicitDirective(
+ [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
+ Level) &&
+ DSAStack->isTaskgroupReductionRef(D, Level));
}
bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) {
SmallVector<OMPClauseWithPreInit *, 8> PICs;
// This is required for proper codegen.
for (auto *Clause : Clauses) {
+ if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
+ Clause->getClauseKind() == OMPC_in_reduction) {
+ // Capture taskgroup task_reduction descriptors inside the tasking regions
+ // with the corresponding in_reduction items.
+ auto *IRC = cast<OMPInReductionClause>(Clause);
+ for (auto *E : IRC->taskgroup_descriptors())
+ if (E)
+ MarkDeclarationsReferencedInExpr(E);
+ }
if (isOpenMPPrivate(Clause->getClauseKind()) ||
Clause->getClauseKind() == OMPC_copyprivate ||
(getLangOpts().OpenMPUseTLS &&
// Generate list of implicitly defined firstprivate variables.
VarsWithInheritedDSA = DSAChecker.getVarsWithInheritedDSA();
- if (!DSAChecker.getImplicitFirstprivate().empty()) {
+ SmallVector<Expr *, 4> ImplicitFirstprivates(
+ DSAChecker.getImplicitFirstprivate().begin(),
+ DSAChecker.getImplicitFirstprivate().end());
+ // Mark taskgroup task_reduction descriptors as implicitly firstprivate.
+ for (auto *C : Clauses) {
+ if (auto *IRC = dyn_cast<OMPInReductionClause>(C)) {
+ for (auto *E : IRC->taskgroup_descriptors())
+ if (E)
+ ImplicitFirstprivates.emplace_back(E);
+ }
+ }
+ if (!ImplicitFirstprivates.empty()) {
if (OMPClause *Implicit = ActOnOpenMPFirstprivateClause(
- DSAChecker.getImplicitFirstprivate(), SourceLocation(),
- SourceLocation(), SourceLocation())) {
+ ImplicitFirstprivates, SourceLocation(), SourceLocation(),
+ SourceLocation())) {
ClausesWithImplicit.push_back(Implicit);
ErrorFound = cast<OMPFirstprivateClause>(Implicit)->varlist_size() !=
- DSAChecker.getImplicitFirstprivate().size();
+ ImplicitFirstprivates.size();
} else
ErrorFound = true;
}
SmallVector<Expr *, 8> RHSs;
/// Reduction operation expression.
SmallVector<Expr *, 8> ReductionOps;
+ /// Taskgroup descriptors for the corresponding reduction items in
+ /// in_reduction clauses.
+ SmallVector<Expr *, 8> TaskgroupDescriptors;
/// List of captures for clause.
SmallVector<Decl *, 4> ExprCaptures;
/// List of postupdate expressions.
LHSs.reserve(Size);
RHSs.reserve(Size);
ReductionOps.reserve(Size);
+ TaskgroupDescriptors.reserve(Size);
ExprCaptures.reserve(Size);
ExprPostUpdates.reserve(Size);
}
LHSs.emplace_back(nullptr);
RHSs.emplace_back(nullptr);
ReductionOps.emplace_back(ReductionOp);
+ TaskgroupDescriptors.emplace_back(nullptr);
}
/// Stores reduction data.
- void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS,
- Expr *ReductionOp) {
+ void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, Expr *ReductionOp,
+ Expr *TaskgroupDescriptor) {
Vars.emplace_back(Item);
Privates.emplace_back(Private);
LHSs.emplace_back(LHS);
RHSs.emplace_back(RHS);
ReductionOps.emplace_back(ReductionOp);
+ TaskgroupDescriptors.emplace_back(TaskgroupDescriptor);
}
};
} // namespace
if (!D)
continue;
+ Expr *TaskgroupDescriptor = nullptr;
QualType Type;
auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr->IgnoreParens());
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr->IgnoreParens());
SourceRange ParentSR;
BinaryOperatorKind ParentBOK;
const Expr *ParentReductionOp;
+ Expr *ParentBOKTD, *ParentReductionOpTD;
DSAStackTy::DSAVarData ParentBOKDSA =
- Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK);
+ Stack->getTopMostTaskgroupReductionData(D, ParentSR, ParentBOK,
+ ParentBOKTD);
DSAStackTy::DSAVarData ParentReductionOpDSA =
- Stack->getTopMostTaskgroupReductionData(D, ParentSR,
- ParentReductionOp);
+ Stack->getTopMostTaskgroupReductionData(
+ D, ParentSR, ParentReductionOp, ParentReductionOpTD);
bool IsParentBOK = ParentBOKDSA.DKind != OMPD_unknown;
bool IsParentReductionOp = ParentReductionOpDSA.DKind != OMPD_unknown;
if (!IsParentBOK && !IsParentReductionOp) {
continue;
}
}
+ TaskgroupDescriptor = IsParentBOK ? ParentBOKTD : ParentReductionOpTD;
+ assert(TaskgroupDescriptor && "Taskgroup descriptor must be defined.");
}
DeclRefExpr *Ref = nullptr;
else
Stack->addTaskgroupReductionData(D, ReductionIdRange, BOK);
}
- RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get());
+ RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get(),
+ TaskgroupDescriptor);
}
return RD.Vars.empty();
}
return OMPInReductionClause::Create(
Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars,
ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId,
- RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps,
+ RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, RD.TaskgroupDescriptors,
buildPreInits(Context, RD.ExprCaptures),
buildPostUpdate(*this, RD.ExprPostUpdates));
}
for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Reader->Record.readSubExpr());
C->setReductionOps(Vars);
+ Vars.clear();
+ for (unsigned I = 0; I != NumVars; ++I)
+ Vars.push_back(Reader->Record.readSubExpr());
+ C->setTaskgroupDescriptors(Vars);
}
void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
Record.AddStmt(E);
for (auto *E : C->reduction_ops())
Record.AddStmt(E);
+ for (auto *E : C->taskgroup_descriptors())
+ Record.AddStmt(E);
}
void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) {
--- /dev/null
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
+
+struct S {
+ int a;
+ S() : a(0) {}
+ S(const S&) {}
+ S& operator=(const S&) {return *this;}
+ ~S() {}
+ friend S operator+(const S&a, const S&b) {return a;}
+};
+
+
+int main(int argc, char **argv) {
+ int a;
+ float b;
+ S c[5];
+ short d[argc];
+#pragma omp taskgroup task_reduction(+: a, b, argc)
+ {
+#pragma omp taskgroup task_reduction(-:c, d)
+#pragma omp parallel
+#pragma omp task in_reduction(+:a) in_reduction(-:d)
+ a += d[a];
+ }
+ return 0;
+}
+
+// CHECK-LABEL: @main
+// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
+// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
+// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
+// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+
+// CHECK: define internal void [[OMP_PARALLEL]](
+// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
+// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
+// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
+// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
+// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
+// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
+// CHECK-NEXT: call i32 @__kmpc_omp_task(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]])
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+
+// CHECK: define internal {{.*}} [[OMP_TASK]](
+// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
+// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
+// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
+// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
+// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
+// CHECK: add nsw i32
+// CHECK: store i32 %
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
+
+struct S {
+ int a;
+ S() : a(0) {}
+ S(const S&) {}
+ S& operator=(const S&) {return *this;}
+ ~S() {}
+ friend S operator+(const S&a, const S&b) {return a;}
+};
+
+
+int main(int argc, char **argv) {
+ int a;
+ float b;
+ S c[5];
+ short d[argc];
+#pragma omp taskgroup task_reduction(+: a, b, argc)
+ {
+#pragma omp taskgroup task_reduction(-:c, d)
+#pragma omp parallel
+#pragma omp taskloop in_reduction(+:a) in_reduction(-:d)
+ for (int i = 0; i < 5; ++i)
+ a += d[a];
+ }
+ return 0;
+}
+
+// CHECK-LABEL: @main
+// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
+// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
+// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
+// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+
+// CHECK: define internal void [[OMP_PARALLEL]](
+// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
+// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
+// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
+// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
+// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
+// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK: ret void
+// CHECK-NEXT: }
+
+// CHECK: define internal {{.*}} [[OMP_TASK]](
+// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
+// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
+// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
+// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
+// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
+// CHECK: add nsw i32
+// CHECK: store i32 %
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* }
+
+struct S {
+ int a;
+ S() : a(0) {}
+ S(const S&) {}
+ S& operator=(const S&) {return *this;}
+ ~S() {}
+ friend S operator+(const S&a, const S&b) {return a;}
+};
+
+
+int main(int argc, char **argv) {
+ int a;
+ float b;
+ S c[5];
+ short d[argc];
+#pragma omp taskgroup task_reduction(+: a, b, argc)
+ {
+#pragma omp taskgroup task_reduction(-:c, d)
+#pragma omp parallel
+#pragma omp taskloop simd in_reduction(+:a) in_reduction(-:d)
+ for (int i = 0; i < 5; ++i)
+ a += d[a];
+ }
+ return 0;
+}
+
+// CHECK-LABEL: @main
+// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]])
+// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
+// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
+// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]])
+
+// CHECK: define internal void [[OMP_PARALLEL]](
+// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]*
+// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1
+// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0
+// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1
+// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]],
+// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]],
+// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1,
+// CHECK: ret void
+// CHECK-NEXT: }
+
+// CHECK: define internal {{.*}} [[OMP_TASK]](
+// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]])
+// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]],
+// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]],
+// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
+// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]],
+// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* %
+// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
+// CHECK: [[D_REF:%.+]] = getelementptr inbounds %
+// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
+// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
+// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
+// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
+// CHECK: add nsw i32
+// CHECK: store i32 %
+#endif
for (auto *E : C->reduction_ops()) {
Visitor->AddStmt(E);
}
+ for (auto *E : C->taskgroup_descriptors())
+ Visitor->AddStmt(E);
}
void OMPClauseEnqueue::VisitOMPLinearClause(const OMPLinearClause *C) {
VisitOMPClauseList(C);