From b74bfc80a434b645bddeb5865c28a3bdaa5440b8 Mon Sep 17 00:00:00 2001
From: Carlo Bertolli <cbertol@us.ibm.com>
Date: Fri, 18 Mar 2016 21:43:32 +0000
Subject: [PATCH] [OPENMP] Implementation of codegen for firstprivate clause of
 target directive

This patch implements the following aspects:

It extends sema to check that a variable is not reference in both a map clause and firstprivate or private. This is needed to ensure correct functioning at codegen level, apart from being useful for the user.
It implements firstprivate for target in codegen. The implementation applies to both host and nvptx devices.
It adds regression tests for codegen of firstprivate, host and device side when using the host as device, and nvptx side.
Please note that the regression test for nvptx codegen is missing VLAs. This is because VLAs currently require saving and restoring the stack which appears not to be a supported operation by nvptx backend.

It adds a check in sema regression tests for target map, firstprivate, and private clauses.

http://reviews.llvm.org/D18203

llvm-svn: 263837
---
 clang/include/clang/Basic/DiagnosticSemaKinds.td   |   2 +
 clang/lib/CodeGen/CGOpenMPRuntime.cpp              |   1 +
 clang/lib/Sema/SemaOpenMP.cpp                      |  41 ++
 .../OpenMP/nvptx_target_firstprivate_codegen.cpp   | 235 ++++++++
 clang/test/OpenMP/target_firstprivate_codegen.cpp  | 595 +++++++++++++++++++++
 clang/test/OpenMP/target_firstprivate_messages.cpp |   2 +
 clang/test/OpenMP/target_map_messages.cpp          |   4 +
 clang/test/OpenMP/target_private_messages.cpp      |   2 +
 8 files changed, 882 insertions(+)
 create mode 100644 clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
 create mode 100644 clang/test/OpenMP/target_firstprivate_codegen.cpp
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eac4e74..8aeb290 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8133,6 +8133,8 @@ def err_omp_schedule_nonmonotonic_ordered : Error<
   "'schedule' clause with 'nonmonotonic' modifier cannot be specified if an 'ordered' clause is specified">;
 def err_omp_ordered_simd : Error<
   "'ordered' clause with a parameter can not be specified in '#pragma omp %0' directive">;
+def err_omp_variable_in_map_and_dsa : Error<
+  "%0 variable cannot be in a map clause in '#pragma omp %1' directive">; 
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5bf12e3..055a094 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4175,6 +4175,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
   // Emit target region as a standalone region.
   auto &&CodeGen = [&CS, &D](CodeGenFunction &CGF) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope);
     CGF.EmitOMPPrivateClause(D, PrivateScope);
     (void)PrivateScope.Privatize();
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 826f069..d58f911 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -7225,6 +7225,20 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
       continue;
     }
 
+    // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+    // A list item cannot appear in both a map clause and a data-sharing
+    // attribute clause on the same construct
+    if (DSAStack->getCurrentDirective() == OMPD_target) {
+      if(DSAStack->checkMapInfoForVar(VD, /* CurrentRegionOnly = */ true,
+                                      [&](Expr *RE) -> bool {return true;})) {
+        Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+            << getOpenMPClauseName(OMPC_private)
+            << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        continue;
+      }
+    }
+
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.1]
     //  A variable of class type (or array thereof) that appears in a private
     //  clause requires an accessible, unambiguous default constructor for the
@@ -7456,6 +7470,19 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
           continue;
         }
       }
+      // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+      // A list item cannot appear in both a map clause and a data-sharing
+      // attribute clause on the same construct
+      if (CurrDir == OMPD_target) {
+        if(DSAStack->checkMapInfoForVar(VD, /* CurrentRegionOnly = */ true,
+                                        [&](Expr *RE) -> bool {return true;})) {
+          Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+              << getOpenMPClauseName(OMPC_firstprivate)
+              << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
+          continue;
+        }
+      }
     }
 
     // Variably modified types are not supported for tasks.
@@ -9897,6 +9924,20 @@ Sema::ActOnOpenMPMapClause(OpenMPMapClauseKind MapTypeModifier,
       continue;
     }
 
+    // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+    // A list item cannot appear in both a map clause and a data-sharing
+    // attribute clause on the same construct
+    if (DKind == OMPD_target && VD) {
+      auto DVar = DSAStack->getTopDSA(VD, false);
+      if (isOpenMPPrivate(DVar.CKind)) {
+        Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+            << getOpenMPClauseName(DVar.CKind)
+            << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        continue;
+      }
+    }
+
     Vars.push_back(RE);
     DSAStack->addExprToVarMapInfo(D, RE);
   }
diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
new file mode 100644
index 0000000..f7b400e
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp
@@ -0,0 +1,235 @@
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fomptargets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fomp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fomptargets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fomp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+// TCHECK:  [[TT:%.+]] = type { i64, i8 }
+// TCHECK:  [[S1:%.+]] = type { double }
+
+int foo(int n, double *ptr) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  double c[5][10];
+  TT<long long, char> d;
+  
+  #pragma omp target firstprivate(a)
+  {
+  }
+  
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]])
+  // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[A1:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+  // TCHECK-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+  // TCHECK-64:  [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]],
+  // TCHECK-32:  [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[A_ADDR_VAL]], i{{[0-9]+}}* [[A1]],
+  // TCHECK:  ret void  
+
+#pragma omp target firstprivate(aa,b,c,d)
+  {
+    aa += 1;
+    b[2] = 1.0;
+    c[1][2] = 1.0;
+    d.X = 1;
+    d.Y = 1;    
+  }
+  
+  // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
+  // target region
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]])
+  // TCHECK:  [[A2_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[B_ADDR:%.+]] = alloca [10 x float]*,
+  // TCHECK:  [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*,
+  // TCHECK:  [[D_ADDR:%.+]] = alloca [[TT]]*,
+  // TCHECK:  [[A2_PRIV:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[B_PRIV:%.+]] = alloca [10 x float],
+  // TCHECK:  [[C_PRIV:%.+]] = alloca [5 x [10 x double]],
+  // TCHECK:  [[D_PRIV:%.+]] = alloca [[TT]],
+  // TCHECK:  store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]],
+  // TCHECK:  store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]],
+  // TCHECK:  store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]],
+  // TCHECK:  store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]],
+  // TCHECK:  [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}*
+  // TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]],
+  // TCHECK:  [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]],
+  // TCHECK:  [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]],
+
+  // firstprivate(aa): a_priv = a_in
+  // TCHECK:  [[A2_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV_A2ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[A2_CONV_VAL]], i{{[0-9]+}}* [[A2_PRIV]],
+
+  //  firstprivate(b): memcpy(b_priv,b_in)
+  // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8*
+  // TCHECK:  [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}})
+
+  // firstprivate(c)
+  // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8*
+  // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+  
+  // firstprivate(d)
+  // TCHECK:  [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8*
+  // TCHECK:  [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}})
+
+  
+  #pragma omp target firstprivate(ptr)
+  {
+    ptr[0]++;
+  }
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]])
+  // TCHECK:  [[PTR_ADDR:%.+]] = alloca double*,
+  // TCHECK:  [[PTR_PRIV:%.+]] = alloca double*,
+  // TCHECK:  store double* [[PTR_IN]], double** [[PTR_ADDR]],
+  // TCHECK:  [[PTR_IN_REF:%.+]] = load double*, double** [[PTR_ADDR]],
+  // TCHECK:  store double* [[PTR_IN_REF]], double** [[PTR_PRIV]],
+
+  return a;
+}
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  tx b[10];
+
+#pragma omp target firstprivate(a,b)
+  {
+    a += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target firstprivate(a,aaa,b)
+  {
+    a += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[A3_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK:  [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[A3_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}],
+// TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]],
+// TCHECK:  store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
+// TCHECK-64:  [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+// TCHECK:  [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8*
+// TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
+
+// firstprivate(a): a_priv = a_in
+// TCHECK-64:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_CONV]],
+// TCHECK-32:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]],
+
+// firstprivate(aaa)
+// TCHECK:  [[A3_IN_VAL:%.+]] = load i8, i8* [[A3_CONV]],
+// TCHECK:  store i{{[0-9]+}} [[A3_IN_VAL]], i{{[0-9]+}}* [[A3_PRIV]],
+
+// firstprivate(b)
+// TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
+// TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+
+// TCHECK:  ret void
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+
+#pragma omp target firstprivate(b)
+    {
+      this->a = (double)b + 1.5;
+    }
+
+    return (int)b;
+  }
+
+  // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]])
+  // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
+  // TCHECK:  [[B_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+  // TCHECK:  store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]],
+  // TCHECK:  [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]],
+  // TCHECK-64:  [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}*
+
+  // firstprivate(b)
+  // TCHECK-64:  [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR_CONV]],
+  // TCHECK-32:  [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[B_IN_VAL]], i{{[0-9]+}}* [[B_PRIV]], 
+
+  // TCHECK: ret void
+};
+
+
+
+int bar(int n, double *ptr){
+  int a = 0;
+  a += foo(n, ptr);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// template
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK:  [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}],
+// TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
+// TCHECK-64:  [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+// TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
+
+// firstprivate(a)
+// TCHECK-64:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR_CONV]]
+// TCHECK-32:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]]
+// TCHECK:  store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]],
+
+// firstprivate(b)
+// TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
+// TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+
+// TCHECK: ret void
+
+#endif
diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp
new file mode 100644
index 0000000..c99ad29
--- /dev/null
+++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp
@@ -0,0 +1,595 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fomp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fomp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fomptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fomp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fomp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fomp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fomptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fomp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+// CHECK:  [[TT:%.+]] = type { i64, i8 }
+// CHECK:  [[S1:%.+]] = type { double }
+
+// TCHECK:  [[TT:%.+]] = type { i64, i8 }
+// TCHECK:  [[S1:%.+]] = type { double }
+
+// CHECK-DAG:  [[SIZET:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 4]
+// CHECK:  [[MAPT:@.+]] = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG:  [[MAPT2:@.+]] = private unnamed_addr constant [9 x i32] [i32 128, i32 3, i32 128, i32 3, i32 3, i32 128, i32 128, i32 3, i32 3]
+// CHECK-64-DAG:  [[SIZET3:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ]] 8]
+// CHECK-32-DAG:  [[SIZET3:@.+]] = private unnamed_addr constant [1 x i32] [i[[SZ]] 4]
+// CHECK-DAG:  [[MAPT3:@.+]] = private unnamed_addr constant [1 x i32] [i32 160]
+// CHECK-DAG:  [[MAPT4:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 128, i32 128, i32 128, i32 3]
+// CHECK-DAG:  [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG:  [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 128, i32 128, i32 3]
+// CHECK-DAG:  [[SIZET6:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 40]
+// CHECK-DAG:  [[MAPT6:@.+]] = private unnamed_addr constant [2 x i32] [i32 128, i32 3]
+
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n, double *ptr) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+  
+  #pragma omp target firstprivate(a)
+  {
+  }
+
+  // a is passed by value to tgt_target
+  // CHECK:  [[N_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // CHECK:  [[PTR_ADDR:%.+]] = alloca double*,
+  // CHECK:  [[A:%.+]] = alloca i{{[0-9]+}},
+  // CHECK:  [[A2:%.+]] = alloca i{{[0-9]+}},
+  // CHECK:  [[B:%.+]] = alloca [10 x float],
+  // CHECK:  [[SSTACK:%.+]] = alloca i8*,
+  // CHECK:  [[C:%.+]] = alloca [5 x [10 x double]],
+  // CHECK:  [[D:%.+]] = alloca [[TT]],
+  // CHECK:  [[ACAST:%.+]] = alloca i{{[0-9]+}},
+  // CHECK:  {{.+}} = alloca i{{[0-9]+}},
+  // CHECK:  [[BASE_PTR_ARR:%.+]] = alloca [1 x i8*],
+  // CHECK:  [[PTR_ARR:%.+]] = alloca [1 x i8*],
+  // CHECK:  [[A2CAST:%.+]] = alloca i{{[0-9]+}},
+  // CHECK:  [[BASE_PTR_ARR2:%.+]] = alloca [9 x i8*],
+  // CHECK:  [[PTR_ARR2:%.+]] = alloca [9 x i8*],
+  // CHECK:  [[SIZET2:%.+]] = alloca [9 x i{{[0-9]+}}],
+  // CHECK:  [[BASE_PTR_ARR3:%.+]] = alloca [1 x i8*],
+  // CHECK:  [[PTR_ARR3:%.+]] = alloca [1 x i8*],  
+  // CHECK:  [[N_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]],
+  // CHECK-64:  [[N_EXT:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL]] to i{{[0-9]+}}
+  // CHECK:  [[SSAVE_RET:%.+]] = call i8* @llvm.stacksave()
+  // CHECK:  store i8* [[SSAVE_RET]], i8** [[SSTACK]],
+  // CHECK-64:  [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_EXT]],
+  // CHECK-32:  [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_ADDR_VAL]],  
+  // CHECK:  [[N_ADDR_VAL2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]],
+  // CHECK-64:  [[N_EXT2:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL2]] to i{{[0-9]+}}
+  // CHECK-64:  [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]]
+  // CHECK-32:  [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]]
+  // CHECK:  [[CN_VLA:%.+]] = alloca double, i{{[0-9]+}} [[CN_SIZE]],
+  // CHECK:  [[AVAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A]],
+  // CHECK-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ACAST]] to i{{[0-9]+}}*
+  // CHECK-64:  store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[CONV]],
+  // CHECK-32:  store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[ACAST]],
+  // CHECK:  [[ACAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ACAST]],
+  // CHECK:  [[ACAST_TOPTR:%.+]] = inttoptr i{{[0-9]+}} [[ACAST_VAL]] to i8*
+  // CHECK:  [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[ACAST_TOPTR]], i8** [[BASE_PTR_GEP]],
+  // CHECK:  [[ACAST_TOPTR2:%.+]] = inttoptr i{{[0-9]+}} [[ACAST_VAL]] to i8*
+  // CHECK:  [[PTR_GEP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[ACAST_TOPTR2]], i8** [[PTR_GEP]],
+  // CHECK:  [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT]], i32 0, i32 0))
+  
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]])
+  // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[A1:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+  // TCHECK-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+  // TCHECK-64:  [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]],
+  // TCHECK-32:  [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]],
+  // TCHECK: store i{{[0-9]+}} [[A_ADDR_VAL]], i{{[0-9]+}}* [[A1]],
+  // TCHECK:  ret void  
+
+#pragma omp target firstprivate(aa,b,bn,c,cn,d)
+  {
+    aa += 1;
+    b[2] = 1.0;
+    bn[3] = 1.0;
+    c[1][2] = 1.0;
+    cn[1][3] = 1.0;
+    d.X = 1;
+    d.Y = 1;    
+  }
+
+  // CHECK:  [[A2VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2]],
+  // CHECK:  [[A2CASTCONV:%.+]] = bitcast i{{[0-9]+}}* [[A2CAST]] to i{{[0-9]+}}*
+  // CHECK:  store i{{[0-9]+}} [[A2VAL]], i{{[0-9]+}}* [[A2CASTCONV]],
+  // CHECK:  [[A2CAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2CAST]],
+  // CHECK-64:  [[BN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_EXT]], 4
+  // CHECK-32:  [[BN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_ADDR_VAL]], 4  
+  // CHECK-64:  [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]]
+  // CHECK-32:  [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]]
+  // CHECK:  [[CN_SIZE_2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SIZE_1]], 8
+
+  // firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short)
+  // CHECK:  [[A2CAST_TO_INT:%.+]] = inttoptr i{{[0-9]+}} [[A2CAST_VAL]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[A2CAST_TO_INT]], i8** [[BASE_PTR_GEP2_0]],
+  // CHECK:  [[A2CAST_TO_INT_2:%.+]] = inttoptr i{{[0-9]+}} [[A2CAST_VAL]] to i8*
+  // CHECK:  [[PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[A2CAST_TO_INT_2]], i8** [[PTR_GEP2_0]],
+  // CHECK:  [[SIZE_GEPA2:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIZE_GEPA2]],
+
+  // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10)
+  // CHECK:  [[BCAST:%.+]] = bitcast [10 x float]* [[B]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[BCAST]], i8** [[BASE_PTR_GEP2_1]],
+  // CHECK:  [[BCAST2:%.+]] = bitcast [10 x float]* [[B]] to i8*
+  // CHECK:  [[PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[BCAST2]], i8** [[PTR_GEP2_1]],
+  // CHECK:  [[SIZE_GEPB:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i{{[0-9]+}} 40, i{{[0-9]+}}* [[SIZE_GEPB]],
+
+  // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float)
+  // CHECK-64:  [[N_EXT3_1:%.+]] = inttoptr i{{[0-9]+}} [[N_EXT]] to i8*
+  // CHECK-32:  [[N_EXT3_1:%.+]] = inttoptr i{{[0-9]+}} [[N_ADDR_VAL]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* [[N_EXT3_1]], i8** [[BASE_PTR_GEP2_2]],
+  // CHECK-64:  [[N_EXT3_2:%.+]] = inttoptr i{{[0-9]+}} [[N_EXT]] to i8*
+  // CHECK-32:  [[N_EXT3_2:%.+]] = inttoptr i{{[0-9]+}} [[N_ADDR_VAL]] to i8*
+  // CHECK:  [[PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* [[N_EXT3_2]], i8** [[PTR_GEP2_2]],
+  // CHECK:  [[SIZE_GEPBN_1:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPBN_1]],
+  // CHECK:  [[VLABN_BCAST:%.+]] = bitcast float* [[BN_VLA]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+  // CHECK:  store i8* [[VLABN_BCAST]], i8** [[BASE_PTR_GEP2_3]],
+  // CHECK: [[SIZE_GEPBN_3:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+  // CHECK:  store i{{[0-9]+}} [[BN_SIZE]], i{{[0-9]+}}* [[SIZE_GEPBN_3]]
+  
+  // firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double))
+  // CHECK:  [[C_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i8* [[C_BCAST]], i8** [[BASE_PTR_GEP2_4]],
+  // CHECK:  [[C_BCAST2:%.+]] = bitcast [5 x [10 x double]]* [[C]] to i8*
+  // CHECK:  [[PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i8* [[C_BCAST2]], i8** [[PTR_GEP2_4]],
+  // CHECK:  [[SIZE_GEPC_4:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i{{[0-9]+}} 400, i{{[0-9]+}}* [[SIZE_GEPC_4]],
+  
+  // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double)
+  // CHECK:  [[BASE_PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
+  // CHECK:  store i8* inttoptr (i{{[0-9]+}} 5 to i8*), i8** [[BASE_PTR_GEP2_5]],
+  // CHECK:  [[PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
+  // CHECK:  store i8* inttoptr (i{{[0-9]+}} 5 to i8*), i8** [[PTR_GEP2_5]],
+  // CHECK:  [[SIZE_GEPCN_5:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
+  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPCN_5]],
+  // CHECK-64:  [[CN_SZ_2_1:%.+]] = inttoptr i{{[0-9]+}} [[N_EXT2]] to i8*
+  // CHECK-32:  [[CN_SZ_2_1:%.+]] = inttoptr i{{[0-9]+}} [[N_ADDR_VAL2]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6
+  // CHECK:  store i8* [[CN_SZ_2_1]], i8** [[BASE_PTR_GEP2_6]],
+  // CHECK-64:  [[CN_SZ_2_2:%.+]] = inttoptr i{{[0-9]+}} [[N_EXT2]] to i8*
+  // CHECK-32:  [[CN_SZ_2_2:%.+]] = inttoptr i{{[0-9]+}} [[N_ADDR_VAL2]] to i8*
+  // CHECK:  [[PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6
+  // CHECK:  store i8* [[CN_SZ_2_2]], i8** [[PTR_GEP2_6]],
+  // CHECK:  [[SIZE_GEPCN_6:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6
+  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPCN_6]],
+  // CHECK:  [[VLA_CN_BCAST:%.+]] = bitcast double* [[CN_VLA]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7
+  // CHECK:  store i8* [[VLA_CN_BCAST]], i8** [[BASE_PTR_GEP2_7]],
+  // CHECK:  [[VLA_CN_BCAST2:%.+]] = bitcast double* [[CN_VLA]] to i8*
+  // CHECK:  [[PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7
+  // CHECK:  store i8* [[VLA_CN_BCAST2]], i8** [[PTR_GEP2_7]],
+  // CHECK:  [[SIZE_GEPCN_7:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7
+  // CHECK:  store i{{[0-9]+}} [[CN_SIZE_2]], i{{[0-9]+}}* [[SIZE_GEPCN_7]],
+  
+  // firstprivate(d): base_ptr = &d, ptr = &d, size = 16 
+  // CHECK:  [[D_REF:%.+]] = bitcast [[TT]]* [[D]] to i8*
+  // CHECK:  [[BASE_PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8
+  // CHECK:  store i8* [[D_REF]], i8** [[BASE_PTR_GEP2_8]],
+  // CHECK:  [[D_REF2:%.+]] = bitcast [[TT]]* [[D]] to i8*
+  // CHECK:  [[PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8
+  // CHECK:  store i8* [[D_REF2]], i8** [[PTR_GEP2_8]],
+  // CHECK:  [[SIZE_GEPCN_8:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8
+  // CHECK:  store i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}}* [[SIZE_GEPCN_8]],
+  
+  
+  // CHECK:  [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]],  i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT2]], i32 0, i32 0))
+  
+  // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
+  // target region
+  // TCHECK:  define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} [[BN_SZ:%.+]], float* {{.+}} [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} [[CN_SZ1:%.+]], i{{[0-9]+}} [[CN_SZ2:%.+]], double* {{.+}} [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]])
+  // TCHECK:  [[A2_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[B_ADDR:%.+]] = alloca [10 x float]*,
+  // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[BN_ADDR:%.+]] = alloca float*,
+  // TCHECK:  [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*,
+  // TCHECK:  [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[CN_ADDR:%.+]] = alloca double*,
+  // TCHECK:  [[D_ADDR:%.+]] = alloca [[TT]]*,
+  // TCHECK:  [[A2_PRIV:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[B_PRIV:%.+]] = alloca [10 x float],
+  // TCHECK:  [[SSTACK:%.+]] = alloca i8*,
+  // TCHECK:  [[C_PRIV:%.+]] = alloca [5 x [10 x double]],
+  // TCHECK:  [[D_PRIV:%.+]] = alloca [[TT]],
+  // TCHECK:  store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]],
+  // TCHECK:  store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[BN_SZ]], i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK:  store float* [[BN_IN]], float** [[BN_ADDR]],
+  // TCHECK:  store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[CN_SZ1]], i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK:  store i{{[0-9]+}} [[CN_SZ2]], i{{[0-9]+}}* [[VLA_ADDR4]],
+  // TCHECK:  store double* [[CN_IN]], double** [[CN_ADDR]],
+  // TCHECK:  store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]],
+  // TCHECK:  [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}*
+  // TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]],
+  // TCHECK:  [[BN_SZ_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK:  [[BN_ADDR_REF:%.+]] = load float*, float** [[BN_ADDR]],
+  // TCHECK:  [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]],
+  // TCHECK:  [[CN_SZ1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK:  [[CN_SZ2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR4]],
+  // TCHECK:  [[CN_ADDR_REF:%.+]] = load double*, double** [[CN_ADDR]],
+  // TCHECK:  [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]],
+
+  // firstprivate(aa): a_priv = a_in
+  // TCHECK:  [[A2_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV_A2ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[A2_CONV_VAL]], i{{[0-9]+}}* [[A2_PRIV]],
+
+  //  firstprivate(b): memcpy(b_priv,b_in)
+  // TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8*
+  // TCHECK:  [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}})
+
+  // TCHECK:  [[RET_STACK:%.+]] = call i8* @llvm.stacksave()
+  // TCHECK:  store i8* [[RET_STACK]], i8** [[SSTACK]],
+
+  // firstprivate(bn)
+  // TCHECK:  [[BN_PRIV:%.+]] = alloca float, i{{[0-9]+}} [[BN_SZ_VAL]],
+  // TCHECK:  [[BN_COPY_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[BN_SZ_VAL]], 4
+  // TCHECK:  [[BN_PRIV__BCAST:%.+]] = bitcast float* [[BN_PRIV]] to i8*
+  // TCHECK:  [[BN_REF_IN_BCAST:%.+]] = bitcast float* [[BN_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[BN_PRIV__BCAST]], i8* [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}})
+
+  // firstprivate(c)
+  // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8*
+  // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+  
+  // firstprivate(cn)
+  // TCHECK:  [[CN_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]]
+  // TCHECK:  [[CN_PRIV:%.+]] = alloca double, i{{[0-9]+}} [[CN_SZ]],
+  // TCHECK:  [[CN_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]]
+  // TCHECK:  [[CN_SZ2_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ2]], 8
+  // TCHECK:  [[CN_PRIV_BCAST:%.+]] = bitcast double* [[CN_PRIV]] to i8*
+  // TCHECK:  [[CN_IN_BCAST:%.+]] = bitcast double* [[CN_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[CN_PRIV_BCAST]], i8* [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}})
+  
+  // firstprivate(d)
+  // TCHECK:  [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8*
+  // TCHECK:  [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}})
+
+  
+  #pragma omp target firstprivate(ptr)
+  {
+    ptr[0]++;
+  }
+  // CHECK:  [[PTR_ADDR_REF:%.+]] = load double*, double** [[PTR_ADDR]],
+  // CHECK:  [[PTR_ADDR_BCAST:%.+]] = bitcast double* [[PTR_ADDR_REF]] to i8*
+
+  // CHECK:  [[BASE_PTR_GEP3_0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[PTR_ADDR_BCAST]], i8** [[BASE_PTR_GEP3_0]],
+  // CHECK:  [[PTR_ADDR_BCAST2:%.+]] = bitcast double* [[PTR_ADDR_REF]] to i8*
+  // CHECK:  [[PTR_GEP3_0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[PTR_ADDR_BCAST2]], i8** [[PTR_GEP3_0]],
+
+  // CHECK:  [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT3]], i32 0, i32 0))
+
+  // TCHECK:  define void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]])
+  // TCHECK:  [[PTR_ADDR:%.+]] = alloca double*,
+  // TCHECK:  [[PTR_PRIV:%.+]] = alloca double*,
+  // TCHECK:  store double* [[PTR_IN]], double** [[PTR_ADDR]],
+  // TCHECK:  [[PTR_IN_REF:%.+]] = load double*, double** [[PTR_ADDR]],
+  // TCHECK:  store double* [[PTR_IN_REF]], double** [[PTR_PRIV]],
+
+  return a;
+}
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  tx b[10];
+
+#pragma omp target firstprivate(a,b)
+  {
+    a += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  char aaa = 0;
+  int b[10];
+
+#pragma omp target firstprivate(a,aaa,b)
+  {
+    a += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[A3_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK:  [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[A3_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}],
+// TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]],
+// TCHECK:  store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
+// TCHECK-64:  [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+// TCHECK:  [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8*
+// TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
+
+// firstprivate(a): a_priv = a_in
+// TCHECK-64:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_CONV]],
+// TCHECK-32:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]],
+
+// firstprivate(aaa)
+// TCHECK:  [[A3_IN_VAL:%.+]] = load i8, i8* [[A3_CONV]],
+// TCHECK:  store i{{[0-9]+}} [[A3_IN_VAL]], i{{[0-9]+}}* [[A3_PRIV]],
+
+// firstprivate(b)
+// TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
+// TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+
+// TCHECK:  ret void
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+#pragma omp target firstprivate(b,c)
+    {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+
+  // on the host side, we first generate r1, then the static function and the template above
+  // CHECK:  define{{.+}} i32 {{.+}}([[S1]]* {{.+}}, i{{[0-9]+}} {{.+}})
+  // CHECK:  [[BASE_PTRS4:%.+]] = alloca [5 x i8*],
+  // CHECK:  [[PTRS4:%.+]] = alloca [5 x i8*],
+  // CHECK:  [[SIZET4:%.+]] = alloca [5 x i{{[0-9]+}}],
+
+  // map(this: this ptr is implicitly captured (not firstprivate matter)
+  // CHECK:  {{.+}} = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store {{.+}}, {{.+}},
+  // CHECK:  {{.+}} = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store {{.+}}, {{.+}},
+  // CHECK:  {{.+}} getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store {{.+}}, {{.+}}
+
+  // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value)
+  // CHECK:  [[B_CAST_PTR:%.+]] = inttoptr i{{[0-9]+}} [[B_CAST:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[B_CAST_PTR]], i8** [[BASE_PTRS_GEP4_1]],
+  // CHECK:  [[B_CAST_PTR2:%.+]] = inttoptr i{{[0-9]+}} [[B_CAST:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[B_CAST_PTR2]], i8** [[PTRS_GEP4_1]],
+  // CHECK:  [[SIZES_GEP4_1:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_1]],
+
+  // firstprivate(c), 3 entries: 2, n, c
+  // CHECK:  [[BASE_PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* inttoptr (i{{[0-9]+}} 2 to i8*), i8** [[BASE_PTRS_GEP4_2]],
+  // CHECK:  [[PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* inttoptr (i{{[0-9]+}} 2 to i8*), i8** [[PTRS_GEP4_2]],
+  // CHECK:  [[SIZES_GEP4_2:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK-64:  store i{{[0-9]+}} 8, i{{[0-9]+}}* [[SIZES_GEP4_2]],
+  // CHECK-32:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_2]],
+  // CHECK:  [[N_PTR:%.+]] = inttoptr i{{[0-9]+}} [[N:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+  // CHECK:  store i8* [[N_PTR]], i8** [[BASE_PTRS_GEP4_3]],
+  // CHECK:  [[N_PTR2:%.+]] = inttoptr i{{[0-9]+}} [[N:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+  // CHECK:  store i8* [[N_PTR2]], i8** [[PTRS_GEP4_3]],
+  // CHECK:  [[SIZES_GEP4_3:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+  // CHECK-64:  store i{{[0-9]+}} 8, i{{[0-9]+}}* [[SIZES_GEP4_3]],
+  // CHECK-32:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_3]],
+  // CHECK:  [[B_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[B:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i8* [[B_BCAST]], i8** [[BASE_PTRS_GEP4_4]],
+  // CHECK:  [[B_BCAST2:%.+]] = bitcast i{{[0-9]+}}* [[B:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i8* [[B_BCAST2]], i8** [[PTRS_GEP4_4]],
+  // CHECK:  [[SIZES_GEP4_4:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+  // CHECK:  store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]],
+
+  // only check that we use the map types stored in the global variable
+  // CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT4]], i32 0, i32 0))
+  
+  // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
+  // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
+  // TCHECK:  [[B_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+  // TCHECK:  [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
+  // TCHECK:  [[SSTACK:%.+]] = alloca i8*,
+
+  // TCHECK:  store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK:  store i{{[0-9]+}}* [[C_IN]], i{{[0-9]+}}** [[C_ADDR]],
+  // TCHECK:  [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]],
+  // TCHECK-64:  [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}*
+  // TCHECK:  [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]],
+  // TCHECK:  [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]],
+  // TCHECK:  [[C_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]],
+
+  // firstprivate(b)
+  // TCHECK-64:  [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR_CONV]],
+  // TCHECK-32:  [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR]],
+  // TCHECK:  store i{{[0-9]+}} [[B_IN_VAL]], i{{[0-9]+}}* [[B_PRIV]],
+ 
+  // TCHECK:  [[RET_STACK:%.+]] = call i8* @llvm.stacksave()
+  // TCHECK:  store i8* [[RET_STACK:%.+]], i8** [[SSTACK]],
+
+  // firstprivate(c)
+  // TCHECK:  [[C_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]]
+  // TCHECK:  [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, i{{[0-9]+}} [[C_SZ]],
+  // TCHECK:  [[C_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]]
+  // TCHECK:  [[C_SZ_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[C_SZ2]],  2
+  // TCHECK:  [[C_PRIV_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_PRIV]] to i8*
+  // TCHECK:  [[C_IN_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_ADDR_REF]] to i8*
+  // TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}})
+
+  // finish
+  // TCHECK: [[RELOAD_SSTACK:%.+]] = load i8*, i8** [[SSTACK]],
+  // TCHECK: call void @llvm.stackrestore(i8* [[RELOAD_SSTACK]])
+  // TCHECK: ret void
+
+
+  // static host function
+  // CHECK:  define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}})
+  // CHECK:  [[BASE_PTRS5:%.+]] = alloca [3 x i8*],
+  // CHECK:  [[PTRS5:%.+]] = alloca [3 x i8*],
+
+  // firstprivate(a): by value
+  // CHECK:  [[A_CAST_PTR:%.+]] = inttoptr i{{[0-9]+}} [[A_CAST:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[A_CAST_PTR]], i8** [[BASE_PTRS_GEP5_0]],
+  // CHECK:  [[A_CAST_PTR2:%.+]] = inttoptr i{{[0-9]+}} [[A_CAST:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK:  store i8* [[A_CAST_PTR2]], i8** [[PTRS_GEP5_0]],
+
+  // firstprivate(aaa): by value
+  // CHECK:  [[A3_CAST_PTR:%.+]] = inttoptr i{{[0-9]+}} [[A3_CAST:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[A3_CAST_PTR]], i8** [[BASE_PTRS_GEP5_1]],
+  // CHECK:  [[A3_CAST_PTR2:%.+]] = inttoptr i{{[0-9]+}} [[A3_CAST:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK:  store i8* [[A3_CAST_PTR2]], i8** [[PTRS_GEP5_1]],
+
+  // firstprivate(b): base_ptr = &b[0], ptr= &b[0]
+  // CHECK:  [[B_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B:%.+]] to i8*
+  // CHECK:  [[BASE_PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* [[B_BCAST]], i8** [[BASE_PTRS_GEP5_2]],
+  // CHECK:  [[B_BCAST2:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B:%.+]] to i8*
+  // CHECK:  [[PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+  // CHECK:  store i8* [[B_BCAST2]], i8** [[PTRS_GEP5_2]],
+
+  // only check that the right sizes and map types are used
+  // CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+};
+
+
+
+int bar(int n, double *ptr){
+  int a = 0;
+  a += foo(n, ptr);
+  S1 S;
+  a += S.r1(n);
+  a += fstatic(n);
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+// template host and device
+
+// CHECK:  define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}})
+// CHECK:  [[BASE_PTRS6:%.+]] = alloca [2 x i8*],
+// CHECK:  [[PTRS6:%.+]] = alloca [2 x i8*],
+
+// firstprivate(a): by value
+// CHECK:  [[AT_CAST_PTR:%.+]] = inttoptr i{{[0-9]+}} [[AT_CAST:%.+]] to i8*
+// CHECK:  [[BASE_PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK:  store i8* [[AT_CAST_PTR]], i8** [[BASE_PTRS_GEP6_0]],
+// CHECK:  [[AT_CAST_PTR2:%.+]] = inttoptr i{{[0-9]+}} [[AT_CAST:%.+]] to i8*
+// CHECK:  [[PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK:  store i8* [[AT_CAST_PTR2]], i8** [[PTRS_GEP6_0]],
+
+// firstprivate(b): pointer
+// CHECK:  [[B_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B:%.+]] to i8*
+// CHECK:  [[BASE_PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK:  store i8* [[B_BCAST]], i8** [[BASE_PTRS_GEP6_1]],
+// CHECK:  [[B_BCAST2:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B:%.+]] to i8*
+// CHECK:  [[PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK:  store i8* [[B_BCAST2]], i8** [[PTRS_GEP6_1]],
+
+// CHECK:  call i32 @__tgt_target(i32 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT6]], i32 0, i32 0))
+
+
+// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
+// TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*,
+// TCHECK:  [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
+// TCHECK:  [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}],
+// TCHECK:  store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]],
+// TCHECK:  store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]],
+// TCHECK-64:  [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}*
+// TCHECK:  [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]],
+
+// firstprivate(a)
+// TCHECK-64:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR_CONV]]
+// TCHECK-32:  [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]]
+// TCHECK:  store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]],
+
+// firstprivate(b)
+// TCHECK:  [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8*
+// TCHECK:  [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8*
+// TCHECK:  call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}})
+
+// TCHECK: ret void
+
+#endif
diff --git a/clang/test/OpenMP/target_firstprivate_messages.cpp b/clang/test/OpenMP/target_firstprivate_messages.cpp
index a375b71..6dbad67 100644
--- a/clang/test/OpenMP/target_firstprivate_messages.cpp
+++ b/clang/test/OpenMP/target_firstprivate_messages.cpp
@@ -189,6 +189,8 @@ int main(int argc, char **argv) {
   static int si;
 #pragma omp target firstprivate(si) // OK
   {}
+#pragma omp target map(i) firstprivate(i) // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target' directive}}
+  {}
   s6 = s6_0; // expected-note {{in instantiation of member function 'S6<float>::operator=' requested here}}
   s7 = s7_0; // expected-note {{in instantiation of member function 'S7<S6<float> >::operator=' requested here}}
   return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp
index 8d4df4b..baa0751 100644
--- a/clang/test/OpenMP/target_map_messages.cpp
+++ b/clang/test/OpenMP/target_map_messages.cpp
@@ -500,6 +500,10 @@ int main(int argc, char **argv) {
 #pragma omp target data map(always, tofrom: always, tofrom, x)
 #pragma omp target map(tofrom j) // expected-error {{expected ',' or ')' in 'map' clause}}
   foo();
+#pragma omp target private(j) map(j) // expected-error {{private variable cannot be in a map clause in '#pragma omp target' directive}}  expected-note {{defined as private}}
+  {}
+#pragma omp target firstprivate(j) map(j)  // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target' directive}} expected-note {{defined as firstprivate}}
+  {}
   return tmain<int, 3>(argc)+tmain<from, 4>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 3>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<int, 4>' requested here}}
 }
 #endif
diff --git a/clang/test/OpenMP/target_private_messages.cpp b/clang/test/OpenMP/target_private_messages.cpp
index 929e35d..a093a87 100644
--- a/clang/test/OpenMP/target_private_messages.cpp
+++ b/clang/test/OpenMP/target_private_messages.cpp
@@ -189,6 +189,8 @@ int main(int argc, char **argv) {
   static int si;
 #pragma omp target private(si) // OK
   {}
+#pragma omp target map(i) private(i) // expected-error {{private variable cannot be in a map clause in '#pragma omp target' directive}}
+  {}
   s6 = s6_0; // expected-note {{in instantiation of member function 'S6<float>::operator=' requested here}}
   s7 = s7_0; // expected-note {{in instantiation of member function 'S7<S6<float> >::operator=' requested here}}
   return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
-- 
2.7.4