[OPENMP] Emit sizes/init ptrs etc. data for task reductions before
authorAlexey Bataev <a.bataev@hotmail.com>
Thu, 8 Mar 2018 15:24:08 +0000 (15:24 +0000)
committerAlexey Bataev <a.bataev@hotmail.com>
Thu, 8 Mar 2018 15:24:08 +0000 (15:24 +0000)
using.

We may emit the code in wrong order because of incorrect implementation
of the runtime functions for task reductions. Threadprivate storages may
be initialized after real initialization of the reduction items. Patch
fixes this problem.

llvm-svn: 327008

clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/task_in_reduction_codegen.cpp
clang/test/OpenMP/taskloop_reduction_codegen.cpp

index 8d1f9c6..957bcc4 100644 (file)
@@ -2886,6 +2886,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
         RedCG.emitSharedLValue(CGF, Cnt);
         RedCG.emitAggregateType(CGF, Cnt);
+        // FIXME: This must removed once the runtime library is fixed.
+        // Emit required threadprivate variables for
+        // initilizer/combiner/finalizer.
+        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+                                                           RedCG, Cnt);
         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
         Replacement =
@@ -2898,11 +2903,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
                          [Replacement]() { return Replacement; });
-        // FIXME: This must removed once the runtime library is fixed.
-        // Emit required threadprivate variables for
-        // initilizer/combiner/finalizer.
-        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
-                                                           RedCG, Cnt);
       }
     }
     // Privatize all private variables except for in_reduction items.
@@ -2935,6 +2935,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         RedCG.emitAggregateType(CGF, Cnt);
         // The taskgroup descriptor variable is always implicit firstprivate and
         // privatized already during procoessing of the firstprivates.
+        // FIXME: This must removed once the runtime library is fixed.
+        // Emit required threadprivate variables for
+        // initilizer/combiner/finalizer.
+        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+                                                           RedCG, Cnt);
         llvm::Value *ReductionsPtr =
             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
                                  TaskgroupDescriptors[Cnt]->getExprLoc());
@@ -2949,11 +2954,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
                               [Replacement]() { return Replacement; });
-        // FIXME: This must removed once the runtime library is fixed.
-        // Emit required threadprivate variables for
-        // initilizer/combiner/finalizer.
-        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
-                                                           RedCG, Cnt);
       }
     }
     (void)InRedScope.Privatize();
index 30ba9a1..3944857 100644 (file)
@@ -78,9 +78,11 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
 // CHECK:       [[D_REF:%.+]] = getelementptr inbounds %
 // CHECK-NEXT:  [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
+// CHECK:       call i8* @__kmpc_threadprivate_cached(
 // CHECK:       [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
 // CHECK-NEXT:  [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
 // CHECK-NEXT:  call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
 // CHECK:       add nsw i32
 // CHECK:       store i32 %
+// CHECK-NOT:   call i8* @__kmpc_threadprivate_cached(
 #endif
index 8abd017..2623d93 100644 (file)
@@ -199,6 +199,17 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+
 // CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"