///
class OMPParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel;
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a N clauses.
///
static OMPParallelDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
///
class OMPForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
-
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs,
- bool HasCancel);
+ Expr *TaskRedRef, bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
unsigned CollapsedNum, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
class OMPSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner directive.
///
static OMPSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
static OMPSectionsDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
class OMPParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
class OMPParallelMasterDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
+
OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc,
unsigned NumClauses)
: OMPExecutableDirective(this, OMPParallelMasterDirectiveClass,
SourceLocation(), SourceLocation(), NumClauses,
1) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
public:
/// Creates directive with a list of \a Clauses.
///
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
///
static OMPParallelMasterDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
static OMPParallelMasterDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
static bool classof(const Stmt *T) {
return T->getStmtClass() == OMPParallelMasterDirectiveClass;
}
class OMPParallelSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
static OMPParallelSectionsDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
///
class OMPTargetParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
SourceLocation(), SourceLocation(), NumClauses,
/*NumChildren=*/1) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
static OMPTargetParallelDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
class OMPTargetParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPTargetParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
///
class OMPDistributeParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
///
class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
class OMPTargetTeamsDistributeParallelForDirective final
: public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
SourceLocation(), SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
OMPParallelDirective *OMPParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *));
void *Mem =
new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
}
-OMPForDirective *
-OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
- SourceLocation EndLoc, unsigned CollapsedNum,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+OMPForDirective *OMPForDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *));
void *Mem =
C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPSectionsDirective *OMPSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *));
void *Mem =
new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPParallelForDirective *OMPParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *));
void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *));
void *Mem =
new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
return Dir;
}
OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *));
void *Mem =
new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *));
void *Mem =
new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
OMPTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
OMPTargetTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size =
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective),
alignof(OMPClause *));
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
+ // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ OMPRTL__kmpc_taskred_modifier_init,
+ // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+ // int is_ws);
+ OMPRTL__kmpc_task_reduction_modifier_fini,
// Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
OMPRTL__kmpc_alloc,
// Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
- CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType()),
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- auto *ElemType = cast<llvm::PointerType>(
- SharedAddresses[N].first.getPointer(CGF)->getType())
- ->getElementType();
+ auto *ElemType =
+ cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
+ ->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
- Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
- SharedAddresses[N].first.getPointer(CGF));
+ Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+ OrigAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
- SizeInChars = CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType());
+ SizeInChars =
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_taskred_modifier_init: {
+ // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num_data, void *data);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
+ CGM.IntTy, CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_taskred_modifier_init");
+ break;
+ }
+ case OMPRTL__kmpc_task_reduction_modifier_fini: {
+ // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+ // int is_ws);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy,
+ /*Name=*/"__kmpc_task_reduction_modifier_fini");
+ break;
+ }
case OMPRTL__kmpc_alloc: {
// Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
// al); omp_allocator_handle_t type is void *.
RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
- ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars,
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
Data.ReductionCopies, Data.ReductionOps);
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
FlagsLVal.getType());
}
+ if (Data.IsReductionWithTaskMod) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {
+ IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TaskRedInput.getPointer(), CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
+ }
// Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
llvm::Value *Args[] = {
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
Args);
}
+void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy,
+ IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true)};
+ (void)CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
+}
+
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
SmallVector<const Expr *, 4> LastprivateVars;
SmallVector<const Expr *, 4> LastprivateCopies;
SmallVector<const Expr *, 4> ReductionVars;
+ SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps;
struct DependData {
unsigned NumberOfParts = 0;
bool Tied = true;
bool Nogroup = false;
+ bool IsReductionWithTaskMod = false;
+ bool IsWorksharingReduction = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data);
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction);
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions.
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data) override;
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction) override;
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions + emits threadprivate variable to
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
+ OMPTaskDataTy Data;
+ SmallVector<const Expr *, 4> TaskLHSs;
+ SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Shareds.emplace_back(Ref);
- Privates.emplace_back(*IPriv);
- ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ if (C->getModifier() == OMPC_REDUCTION_task) {
+ Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
++IPriv;
++Count;
}
+ if (!Data.ReductionVars.empty()) {
+ Data.IsReductionWithTaskMod = true;
+ Data.IsWorksharingReduction =
+ isOpenMPWorksharingDirective(D.getDirectiveKind());
+ llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+ const Expr *TaskRedRef = nullptr;
+ switch (D.getDirectiveKind()) {
+ case OMPD_parallel:
+ TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_for:
+ TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_sections:
+ TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_for:
+ TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_master:
+ TaskRedRef =
+ cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_sections:
+ TaskRedRef =
+ cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel:
+ TaskRedRef =
+ cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel_for:
+ TaskRedRef =
+ cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_distribute_parallel_for:
+ TaskRedRef =
+ cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_target_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_simd:
+ case OMPD_for_simd:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_parallel_for_simd:
+ case OMPD_task:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
+ case OMPD_ordered:
+ case OMPD_atomic:
+ case OMPD_teams:
+ case OMPD_target:
+ case OMPD_cancellation_point:
+ case OMPD_cancel:
+ case OMPD_target_data:
+ case OMPD_target_enter_data:
+ case OMPD_target_exit_data:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
+ case OMPD_distribute:
+ case OMPD_target_update:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_declare_simd:
+ case OMPD_requires:
+ case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
+ case OMPD_unknown:
+ llvm_unreachable("Enexpected directive with task reductions.");
+ }
+
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+ EmitVarDecl(*VD);
+ EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+ /*Volatile=*/false, TaskRedRef->getType());
+ }
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
+ bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ IsReductionWithTaskMod =
+ IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
+ if (IsReductionWithTaskMod) {
+ CGM.getOpenMPRuntime().emitTaskReductionFini(
+ *this, D.getBeginLoc(),
+ isOpenMPWorksharingDirective(D.getDirectiveKind()));
+ }
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR,
const Expr *&ReductionRef,
Expr *&TaskgroupDescriptor) const;
- /// Return reduction reference expression for the current taskgroup.
+ /// Return reduction reference expression for the current taskgroup or
+ /// parallel/worksharing directives with task reductions.
Expr *getTaskgroupReductionRef() const {
- assert(getTopOfStack().Directive == OMPD_taskgroup &&
- "taskgroup reference expression requested for non taskgroup "
- "directive.");
+ assert((getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
+ "taskgroup reference expression requested for non taskgroup or "
+ "parallel/worksharing directive.");
return getTopOfStack().TaskgroupReductionRef;
}
/// Checks if the given \p VD declaration is actually a taskgroup reduction
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
- getTopOfStack().Directive == OMPD_taskgroup &&
+ (getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(BOK, SR);
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
- getTopOfStack().Directive == OMPD_taskgroup &&
+ (getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(ReductionRef, SR);
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
- if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+ if (Data.Attributes != OMPC_reduction ||
+ Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
- return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
- Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+ return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+ Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
- if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+ if (Data.Attributes != OMPC_reduction ||
+ Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
- return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
- Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+ return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+ Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
// Consider taskgroup reduction descriptor variable a private
// to avoid possible capture in the region.
(DSAStack->hasExplicitDirective(
- [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
+ [](OpenMPDirectiveKind K) {
+ return K == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(K) ||
+ isOpenMPWorksharingDirective(K)) &&
+ !isOpenMPSimdDirective(K));
+ },
Level) &&
DSAStack->isTaskgroupReductionRef(D, Level)))
? OMPC_private
SmallVector<const OMPClauseWithPreInit *, 4> PICs;
// This is required for proper codegen.
for (OMPClause *Clause : Clauses) {
- if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
+ if (!LangOpts.OpenMPSimd &&
+ isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
Clause->getClauseKind() == OMPC_in_reduction) {
// Capture taskgroup task_reduction descriptors inside the tasking regions
// with the corresponding in_reduction items.
setFunctionHasBranchProtectedScope();
return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
}
setFunctionHasBranchProtectedScope();
- return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
- Clauses, AStmt, B, DSAStack->isCancelRegion());
+ return OMPForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPForSimdDirective(
setFunctionHasBranchProtectedScope();
return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
}
setFunctionHasBranchProtectedScope();
- return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
- NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ return OMPParallelForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
setFunctionHasBranchProtectedScope();
- return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses,
- AStmt);
+ return OMPParallelMasterDirective::Create(
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef());
}
StmtResult
setFunctionHasBranchProtectedScope();
return OMPParallelSectionsDirective::Create(
- Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion());
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// detach and mergeable clauses are mutially exclusive, check for it.
setFunctionHasBranchProtectedScope();
- return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
- AStmt, DSAStack->isCancelRegion());
+ return OMPTargetParallelDirective::Create(
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
}
setFunctionHasBranchProtectedScope();
- return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc,
- NestedLoopCount, Clauses, AStmt,
- B, DSAStack->isCancelRegion());
+ return OMPTargetParallelForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// Check for existence of a map clause in the list of clauses.
setFunctionHasBranchProtectedScope();
return OMPDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
return OMPTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPTargetTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
}
// All reduction items are still marked as reduction (to do not increase
// code base size).
- Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref,
- RD.RedModifier);
- if (CurrDir == OMPD_taskgroup) {
+ unsigned Modifier = RD.RedModifier;
+ // Consider task_reductions as reductions with task modifier. Required for
+ // correct analysis of in_reduction clauses.
+ if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction)
+ Modifier = OMPC_REDUCTION_task;
+ Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier);
+ if (Modifier == OMPC_REDUCTION_task &&
+ (CurrDir == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(CurrDir) ||
+ isOpenMPWorksharingDirective(CurrDir)) &&
+ !isOpenMPSimdDirective(CurrDir)))) {
if (DeclareReductionRef.isUsable())
Stack->addTaskgroupReductionData(D, ReductionIdRange,
DeclareReductionRef.get());
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
}
void ASTStmtReader::VisitOMPParallelSectionsDirective(
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
VisitStmt(D);
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readBool());
}
void ASTStmtReader::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
void ASTStmtReader::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_FOR_DIRECTIVE;
}
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE;
}
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE;
}
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE;
}
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.writeBool(D->hasCancel());
Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams
+#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif