[OPENMP] Codegen for 'firstprivate' clause in 'task' directive.
authorAlexey Bataev <a.bataev@hotmail.com>
Tue, 5 May 2015 04:05:12 +0000 (04:05 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Tue, 5 May 2015 04:05:12 +0000 (04:05 +0000)
For tasks codegen for private/firstprivate variables are different rather than for other directives.

1. Build an internal structure of privates for each private variable:
struct .kmp_privates_t. {
  Ty1 var1;
  ...
  Tyn varn;
};
2. Add a new field to kmp_task_t type with list of privates.
struct kmp_task_t {
  void *              shareds;
  kmp_routine_entry_t routine;
  kmp_int32           part_id;
  kmp_routine_entry_t destructors;
  .kmp_privates_t.    privates;
};
3. Create a function with destructors calls for all privates after end of task region.
kmp_int32 .omp_task_destructor.(kmp_int32 gtid, kmp_task_t *tt) {
  ~Destructor(&tt->privates.var1);
  ...
  ~Destructor(&tt->privates.varn);
  return 0;
}
4. Perform initialization of all firstprivate fields (by simple copying for POD data, copy constructor calls for classes) + provide address of a destructor function after kmpc_omp_task_alloc() and before kmpc_omp_task() calls.
kmp_task_t *new_task = __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t *task_entry);

CopyConstructor(new_task->privates.var1, *new_task->shareds.var1_ref);
new_task->shareds.var1_ref = &new_task->privates.var1;
...
CopyConstructor(new_task->privates.varn, *new_task->shareds.varn_ref);
new_task->shareds.varn_ref = &new_task->privates.varn;

new_task->destructors = .omp_task_destructor.;
kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *new_task)
Differential Revision: http://reviews.llvm.org/D9370

llvm-svn: 236479

clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/task_firstprivate_codegen.cpp [new file with mode: 0644]

index b89f0c0..a0c02ce 100644 (file)
@@ -1630,12 +1630,21 @@ static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
 }
 
 namespace {
-typedef std::pair<CharUnits /*Align*/,
-                  std::pair<const VarDecl *, const VarDecl *>> VDPair;
+struct PrivateHelpersTy {
+  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
+                   const VarDecl *PrivateElemInit)
+      : Original(Original), PrivateCopy(PrivateCopy),
+        PrivateElemInit(PrivateElemInit) {}
+  const VarDecl *Original;
+  const VarDecl *PrivateCopy;
+  const VarDecl *PrivateElemInit;
+};
+typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
 } // namespace
 
-static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
-                                            const ArrayRef<VDPair> Privates) {
+static RecordDecl *
+createPrivatesRecordDecl(CodeGenModule &CGM,
+                         const ArrayRef<PrivateDataTy> Privates) {
   if (!Privates.empty()) {
     auto &C = CGM.getContext();
     // Build struct .kmp_privates_t. {
@@ -1644,8 +1653,8 @@ static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
     RD->startDefinition();
     for (auto &&Pair : Privates) {
-      addFieldToRecordDecl(C, RD,
-                           Pair.second.first->getType().getNonReferenceType());
+      addFieldToRecordDecl(
+          C, RD, Pair.second.Original->getType().getNonReferenceType());
     }
     // TODO: add firstprivate fields.
     RD->completeDefinition();
@@ -1654,10 +1663,10 @@ static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
   return nullptr;
 }
 
-static RecordDecl *createKmpTaskTRecordDecl(CodeGenModule &CGM,
-                                            QualType KmpInt32Ty,
-                                            QualType KmpRoutineEntryPointerQTy,
-                                            const ArrayRef<VDPair> Privates) {
+static RecordDecl *
+createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+                         QualType KmpRoutineEntryPointerQTy,
+                         const ArrayRef<PrivateDataTy> Privates) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t {
   //         void *              shareds;
@@ -1782,7 +1791,8 @@ emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc,
   return DestructorFn;
 }
 
-static int array_pod_sort_comparator(const VDPair *P1, const VDPair *P2) {
+static int array_pod_sort_comparator(const PrivateDataTy *P1,
+                                     const PrivateDataTy *P2) {
   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
 }
 
@@ -1791,18 +1801,33 @@ void CGOpenMPRuntime::emitTaskCall(
     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
     const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
-    const ArrayRef<const Expr *> PrivateCopies) {
+    const ArrayRef<const Expr *> PrivateCopies,
+    const ArrayRef<const Expr *> FirstprivateVars,
+    const ArrayRef<const Expr *> FirstprivateCopies,
+    const ArrayRef<const Expr *> FirstprivateInits) {
   auto &C = CGM.getContext();
-  llvm::SmallVector<VDPair, 8> Privates;
-  auto I = PrivateCopies.begin();
+  llvm::SmallVector<PrivateDataTy, 8> Privates;
   // Aggeregate privates and sort them by the alignment.
+  auto I = PrivateCopies.begin();
   for (auto *E : PrivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
         C.getTypeAlignInChars(VD->getType()),
-        std::make_pair(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()))));
+        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+                         /*PrivateElemInit=*/nullptr)));
     ++I;
   }
+  I = FirstprivateCopies.begin();
+  auto IElemInitRef = FirstprivateInits.begin();
+  for (auto *E : FirstprivateVars) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    Privates.push_back(std::make_pair(
+        C.getTypeAlignInChars(VD->getType()),
+        PrivateHelpersTy(
+            VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
+    ++I, ++IElemInitRef;
+  }
   llvm::array_pod_sort(Privates.begin(), Privates.end(),
                        array_pod_sort_comparator);
   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
@@ -1872,11 +1897,61 @@ void CGOpenMPRuntime::emitTaskCall(
     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
         cast<CapturedStmt>(*D.getAssociatedStmt()));
     for (auto &&Pair : Privates) {
-      auto *VD = Pair.second.second;
+      auto *VD = Pair.second.PrivateCopy;
       auto *Init = VD->getAnyInitializer();
       LValue PrivateLValue = CGF.EmitLValueForField(Base, *FI);
       if (Init) {
-        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+        if (auto *Elem = Pair.second.PrivateElemInit) {
+          auto *OriginalVD = Pair.second.Original;
+          auto *SharedField = CapturesInfo.lookup(OriginalVD);
+          auto SharedRefLValue =
+              CGF.EmitLValueForField(SharedsBase, SharedField);
+          if (OriginalVD->getType()->isArrayType()) {
+            // Initialize firstprivate array.
+            if (!isa<CXXConstructExpr>(Init) ||
+                CGF.isTrivialInitializer(Init)) {
+              // Perform simple memcpy.
+              CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
+                                      SharedRefLValue.getAddress(),
+                                      OriginalVD->getType());
+            } else {
+              // Initialize firstprivate array using element-by-element
+              // intialization.
+              CGF.EmitOMPAggregateAssign(
+                  PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
+                  OriginalVD->getType(),
+                  [&CGF, Elem, Init, &CapturesInfo](llvm::Value *DestElement,
+                                                    llvm::Value *SrcElement) {
+                    // Clean up any temporaries needed by the initialization.
+                    CodeGenFunction::OMPPrivateScope InitScope(CGF);
+                    InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
+                      return SrcElement;
+                    });
+                    (void)InitScope.Privatize();
+                    // Emit initialization for single element.
+                    auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
+                    CGF.CapturedStmtInfo = &CapturesInfo;
+                    CGF.EmitAnyExprToMem(Init, DestElement,
+                                         Init->getType().getQualifiers(),
+                                         /*IsInitializer=*/false);
+                    CGF.CapturedStmtInfo = OldCapturedStmtInfo;
+                  });
+            }
+          } else {
+            CodeGenFunction::OMPPrivateScope InitScope(CGF);
+            InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
+              return SharedRefLValue.getAddress();
+            });
+            (void)InitScope.Privatize();
+            auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
+            CGF.CapturedStmtInfo = &CapturesInfo;
+            CGF.EmitExprAsInit(Init, VD, PrivateLValue,
+                               /*capturedByInit=*/false);
+            CGF.CapturedStmtInfo = OldCapturedStmtInfo;
+          }
+        } else {
+          CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+        }
       }
       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
       // Copy addresses of privates to corresponding references in the list of
@@ -1884,7 +1959,7 @@ void CGOpenMPRuntime::emitTaskCall(
       //   ...
       //   tt->shareds.var_addr = &tt->privates.private_var;
       //   ...
-      auto *OriginalVD = Pair.second.first;
+      auto *OriginalVD = Pair.second.Original;
       auto *SharedField = CapturesInfo.lookup(OriginalVD);
       auto SharedRefLValue =
           CGF.EmitLValueForFieldInitialization(SharedsBase, SharedField);
index 7c80562..ea714d7 100644 (file)
@@ -557,14 +557,24 @@ public:
   /// \param PrivateVars List of references to private variables for the task
   /// directive.
   /// \param PrivateCopies List of private copies for each private variable in
-  /// \a PrivateVars.
+  /// \p PrivateVars.
+  /// \param FirstprivateVars List of references to private variables for the
+  /// task directive.
+  /// \param FirstprivateCopies List of private copies for each private variable
+  /// in \p FirstprivateVars.
+  /// \param FirstprivateInits List of references to auto generated variables
+  /// used for initialization of a single array element. Used if firstprivate
+  /// variable is of array type.
   virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
                             const OMPExecutableDirective &D, bool Tied,
                             llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
                             llvm::Value *TaskFunction, QualType SharedsTy,
                             llvm::Value *Shareds, const Expr *IfCond,
                             const ArrayRef<const Expr *> PrivateVars,
-                            const ArrayRef<const Expr *> PrivateCopies);
+                            const ArrayRef<const Expr *> PrivateCopies,
+                            const ArrayRef<const Expr *> FirstprivateVars,
+                            const ArrayRef<const Expr *> FirstprivateCopies,
+                            const ArrayRef<const Expr *> FirstprivateInits);
 
   /// \brief Emit code for the directive that does not require outlining.
   ///
index 2c2d703..1f06e24 100644 (file)
@@ -1373,10 +1373,10 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   if (auto C = S.getSingleClause(OMPC_if)) {
     IfCond = cast<OMPIfClause>(C)->getCondition();
   }
+  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   // Get list of private variables.
   llvm::SmallVector<const Expr *, 8> Privates;
   llvm::SmallVector<const Expr *, 8> PrivateCopies;
-  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) {
     auto *C = cast<OMPPrivateClause>(*I);
     auto IRef = C->varlist_begin();
@@ -1389,9 +1389,29 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       ++IRef;
     }
   }
-  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, Tied, Final,
-                                      OutlinedFn, SharedsTy, CapturedStruct,
-                                      IfCond, Privates, PrivateCopies);
+  EmittedAsPrivate.clear();
+  // Get list of firstprivate variables.
+  llvm::SmallVector<const Expr *, 8> FirstprivateVars;
+  llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
+  llvm::SmallVector<const Expr *, 8> FirstprivateInits;
+  for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) {
+    auto *C = cast<OMPFirstprivateClause>(*I);
+    auto IRef = C->varlist_begin();
+    auto IElemInitRef = C->inits().begin();
+    for (auto *IInit : C->private_copies()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        FirstprivateVars.push_back(*IRef);
+        FirstprivateCopies.push_back(IInit);
+        FirstprivateInits.push_back(*IElemInitRef);
+      }
+      ++IRef, ++IElemInitRef;
+    }
+  }
+  CGM.getOpenMPRuntime().emitTaskCall(
+      *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
+      CapturedStruct, IfCond, Privates, PrivateCopies, FirstprivateVars,
+      FirstprivateCopies, FirstprivateInits);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
diff --git a/clang/test/OpenMP/task_firstprivate_codegen.cpp b/clang/test/OpenMP/task_firstprivate_codegen.cpp
new file mode 100644 (file)
index 0000000..204a852
--- /dev/null
@@ -0,0 +1,401 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// expected-no-diagnostics
+
+// It doesn't pass on win32.
+// REQUIRES: shell
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  S(const S &s, T t = T()) : f(s.f + t) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile double g;
+
+// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* }
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [[S_DOUBLE_TY]], [2 x [[S_DOUBLE_TY]]], i32, [2 x i32]
+// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)*, [[PRIVATES_MAIN_TY]] }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)*, [[PRIVATES_TMAIN_TY]] }
+template <typename T>
+T tmain() {
+  S<T> ttt;
+  S<T> test(ttt);
+  T t_var = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> var(3);
+#pragma omp task firstprivate(t_var, vec, s_arr, s_arr, var, var)
+  {
+    vec[0] = t_var;
+    s_arr[0] = var;
+  }
+  return T();
+}
+
+int main() {
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 4
+// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]]
+// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]]
+// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+// LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]],
+// LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+// LAMBDA: ret
+#pragma omp task firstprivate(g)
+  {
+    // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
+    // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+    // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+    // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+    // LAMBDA: store volatile double 2.0{{.+}}, double* [[G_REF]]
+
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    g = 1;
+    // LAMBDA: store volatile double 1.0{{.+}}, double* %{{.+}},
+    // LAMBDA: call void [[INNER_LAMBDA]](%
+    // LAMBDA: ret
+    [&]() {
+      g = 2;
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 4
+  // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]]
+  // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]]
+  // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+  // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: store double* [[G_PRIVATE_ADDR]], double** [[G_ADDR_REF]],
+  // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
+  // BLOCKS: ret
+#pragma omp task firstprivate(g)
+  {
+    // BLOCKS: define {{.+}} void {{@.+}}(i8*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store volatile double 2.0{{.+}}, double*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: ret
+
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    g = 1;
+    // BLOCKS: store volatile double 1.0{{.+}}, double* %{{.+}},
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    ^{
+      g = 2;
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<double> ttt;
+  S<double> test(ttt);
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<double> s_arr[] = {1, 2};
+  S<double> var(3);
+#pragma omp task firstprivate(var, t_var, s_arr, vec, s_arr, var)
+  {
+    vec[0] = t_var;
+    s_arr[0] = var;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: alloca [[S_DOUBLE_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]],
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]])
+
+// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]],
+
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         void *              shareds;
+//         kmp_routine_entry_t routine;
+//         kmp_int32           part_id;
+//         kmp_routine_entry_t destructors;
+//         [[KMP_TASK_MAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 72, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// Also copy address of private copy to the corresponding shareds reference.
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]*
+
+// Constructors for s_arr and var.
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]],
+// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]],
+
+// shareds->var_addr = &kmp_task_t->privates.var;
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]],
+
+// s_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
+// CHECK: load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]],
+// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]],
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// shareds->s_arr_addr = &kmp_task_t->privates.s_arr;
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]],
+
+// t_var;
+// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[T_VAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[T_VAR_ADDR_REF]],
+// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]],
+// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]],
+
+// shareds->t_var_addr = &kmp_task_t->privates.t_var;
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]],
+
+// vec;
+// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
+// shareds->vec_addr = &kmp_task_t->privates.vec;
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]],
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]],
+
+// Start task.
+// CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8* [[RES]])
+
+// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
+
+// No destructors must be called for private copies of s_arr and var.
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]*
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: ret
+//
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]*)
+
+// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]],
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]*
+
+// Privates actually are used.
+// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2
+// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3
+
+// CHECK: ret
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]*)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: ret i32
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: alloca [[S_INT_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]])
+
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]],
+
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         void *              shareds;
+//         kmp_routine_entry_t routine;
+//         kmp_int32           part_id;
+//         kmp_routine_entry_t destructors;
+//         [[KMP_TASK_TMAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 56, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]*
+
+// t_var;
+// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[T_VAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[T_VAR_ADDR_REF]],
+// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]],
+// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]],
+
+// shareds->t_var_addr = &kmp_task_t->privates.t_var;
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: store i32* [[PRIVATE_T_VAR_REF]], i32** [[T_VAR_ADDR_REF]],
+
+// vec;
+// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
+// shareds->vec_addr = &kmp_task_t->privates.vec;
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: store [2 x i32]* [[PRIVATE_VEC_REF]], [2 x i32]** [[VEC_ADDR_REF]],
+
+// Constructors for s_arr and var.
+// a_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]],
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// shareds->s_arr_addr = &kmp_task_t->privates.s_arr;
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
+// CHECK: store [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR_REF]],
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]],
+
+// shareds->var_addr = &kmp_task_t->privates.var;
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: store [[S_INT_TY]]* [[PRIVATE_VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR_REF]],
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]],
+
+// Start task.
+// CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8* [[RES]])
+
+// No destructors must be called for private copies of s_arr and var.
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: ret
+//
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]*)
+
+// Substitute addresses of shared variables in capture struct by address of private copies from kmp_task_t.
+// CHECK: [[SHAREDS_ADDR_REF:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_ADDR_REF]],
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]*
+
+// Privates actually are used.
+// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 2
+// CHECK-DAG: getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 3
+
+// CHECK: ret
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]*)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+#endif
+