[OPENMP] Captured arguments of the capturable clauses by value.
authorAlexey Bataev <a.bataev@hotmail.com>
Fri, 22 Dec 2017 21:01:52 +0000 (21:01 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Fri, 22 Dec 2017 21:01:52 +0000 (21:01 +0000)
If the clause is applied to the combined construct and has captured
expression, try to capture this expression by value rather than by
reference.

llvm-svn: 321386

clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/parallel_for_codegen.cpp
clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp

index b34bb33..0880b2d 100644 (file)
@@ -1290,9 +1290,14 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {
   }
 
   if (IsByRef && Ty.getNonReferenceType()->isScalarType()) {
-    IsByRef = !DSAStack->hasExplicitDSA(
-        D, [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
-        Level, /*NotLastprivate=*/true);
+    IsByRef =
+        !DSAStack->hasExplicitDSA(
+            D,
+            [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
+            Level, /*NotLastprivate=*/true) &&
+        // If the variable is artificial and must be captured by value - try to
+        // capture by value.
+        !(isa<OMPCapturedExprDecl>(D) && D->hasAttr<OMPCaptureKindAttr>());
   }
 
   // When passing data by copy, we need to make sure it fits the uintptr size
@@ -2321,10 +2326,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,
   ASTContext &C = S.getASTContext();
   Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts();
   QualType Ty = Init->getType();
+  Attr *OMPCaptureKind = nullptr;
   if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) {
-    if (S.getLangOpts().CPlusPlus)
+    if (S.getLangOpts().CPlusPlus) {
       Ty = C.getLValueReferenceType(Ty);
-    else {
+    else {
       Ty = C.getPointerType(Ty);
       ExprResult Res =
           S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_AddrOf, Init);
@@ -2333,11 +2339,16 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,
       Init = Res.get();
     }
     WithInit = true;
+  } else if (AsExpression) {
+    // This variable must be captured by value.
+    OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown);
   }
   auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty,
                                           CaptureExpr->getLocStart());
   if (!WithInit)
     CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange()));
+  if (OMPCaptureKind)
+    CED->addAttr(OMPCaptureKind);
   S.CurContext->addHiddenDecl(CED);
   S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false);
   return CED;
@@ -2346,31 +2357,34 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,
 static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
                                  bool WithInit) {
   OMPCapturedExprDecl *CD;
-  if (auto *VD = S.IsOpenMPCapturedDecl(D))
+  if (auto *VD = S.IsOpenMPCapturedDecl(D)) {
     CD = cast<OMPCapturedExprDecl>(VD);
-  else
+  } else {
     CD = buildCaptureDecl(S, D->getIdentifier(), CaptureExpr, WithInit,
                           /*AsExpression=*/false);
+  }
   return buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
                           CaptureExpr->getExprLoc());
 }
 
 static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) {
+  CaptureExpr = S.DefaultLvalueConversion(CaptureExpr).get();
   if (!Ref) {
-    auto *CD =
-        buildCaptureDecl(S, &S.getASTContext().Idents.get(".capture_expr."),
-                         CaptureExpr, /*WithInit=*/true, /*AsExpression=*/true);
+    OMPCapturedExprDecl *CD = buildCaptureDecl(
+        S, &S.getASTContext().Idents.get(".capture_expr."), CaptureExpr,
+        /*WithInit=*/true, /*AsExpression=*/true);
     Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
                            CaptureExpr->getExprLoc());
   }
   ExprResult Res = Ref;
   if (!S.getLangOpts().CPlusPlus &&
       CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue() &&
-      Ref->getType()->isPointerType())
+      Ref->getType()->isPointerType()) {
     Res = S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_Deref, Ref);
-  if (!Res.isUsable())
-    return ExprError();
-  return CaptureExpr->isGLValue() ? Res : S.DefaultLvalueConversion(Res.get());
+    if (!Res.isUsable())
+      return ExprError();
+  }
+  return S.DefaultLvalueConversion(Res.get());
 }
 
 namespace {
@@ -8117,12 +8131,13 @@ OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier,
     if (Val.isInvalid())
       return nullptr;
 
-    ValExpr = MakeFullExpr(Val.get()).get();
+    ValExpr = Val.get();
 
     OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
     CaptureRegion =
         getOpenMPCaptureRegionForClause(DKind, OMPC_if, NameModifier);
     if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+      ValExpr = MakeFullExpr(ValExpr).get();
       llvm::MapVector<Expr *, DeclRefExpr *> Captures;
       ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
       HelperValStmt = buildPreInits(Context, Captures);
@@ -8239,6 +8254,7 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads,
   OpenMPDirectiveKind CaptureRegion =
       getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads);
   if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -8666,6 +8682,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
                      DSAStack->getCurrentDirective(), OMPC_schedule) !=
                      OMPD_unknown &&
                  !CurContext->isDependentContext()) {
+        ValExpr = MakeFullExpr(ValExpr).get();
         llvm::MapVector<Expr *, DeclRefExpr *> Captures;
         ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
         HelperValStmt = buildPreInits(Context, Captures);
@@ -11355,6 +11372,7 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc,
   OpenMPDirectiveKind CaptureRegion =
       getOpenMPCaptureRegionForClause(DKind, OMPC_device);
   if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -12378,6 +12396,7 @@ OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams,
   OpenMPDirectiveKind CaptureRegion =
       getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams);
   if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -12404,6 +12423,7 @@ OMPClause *Sema::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit,
   OpenMPDirectiveKind CaptureRegion =
       getOpenMPCaptureRegionForClause(DKind, OMPC_thread_limit);
   if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) {
+    ValExpr = MakeFullExpr(ValExpr).get();
     llvm::MapVector<Expr *, DeclRefExpr *> Captures;
     ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
     HelperValStmt = buildPreInits(Context, Captures);
@@ -12514,6 +12534,7 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause(
                      DSAStack->getCurrentDirective(), OMPC_dist_schedule) !=
                      OMPD_unknown &&
                  !CurContext->isDependentContext()) {
+        ValExpr = MakeFullExpr(ValExpr).get();
         llvm::MapVector<Expr *, DeclRefExpr *> Captures;
         ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
         HelperValStmt = buildPreInits(Context, Captures);
index 1773619..db3a2de 100644 (file)
@@ -15,10 +15,12 @@ void with_var_schedule() {
   double a = 5;
 // CHECK: [[CHUNK_SIZE:%.+]] = fptosi double %{{.+}}to i8
 // CHECK: store i8 %{{.+}}, i8* [[CHUNK:%.+]],
-// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i8* [[CHUNK]])
+// CHECK: [[VAL:%.+]] = load i8, i8* [[CHUNK]],
+// CHECK: store i8 [[VAL]], i8*
+// CHECK: [[CHUNK:%.+]] = load i64, i64* %
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i64 [[CHUNK]])
 
-// CHECK: [[CHUNK:%.+]] = load i8*, i8** %
-// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]],
+// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* %
 // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
 // CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]])
index 7c0ac46..304cc8b 100644 (file)
@@ -60,8 +60,8 @@ int main() {
 #pragma omp teams distribute parallel for num_threads(a)
   for (int i = 0; i < 100; i++) {
     // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
-    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*,
-    // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]],
+    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64,
+    // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8*
     // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
     // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
     // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
@@ -110,9 +110,9 @@ int main() {
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
 
 // CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]])
-// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*,
+// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64,
 // CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]],
-// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]],
+// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8*
 // CHECK-DAG:   [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]],
 // CHECK-DAG:   [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}}
 // CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]])
index 93051af..2576241 100644 (file)
@@ -60,8 +60,8 @@ int main() {
 #pragma omp teams distribute parallel for simd num_threads(a)
   for (int i = 0; i < 100; i++) {
     // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
-    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*,
-    // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]],
+    // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64,
+    // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8*
     // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
     // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
     // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
@@ -110,9 +110,9 @@ int main() {
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
 
 // CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]])
-// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*,
+// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64,
 // CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]],
-// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]],
+// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8*
 // CHECK-DAG:   [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]],
 // CHECK-DAG:   [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}}
 // CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]])