[OPENMP]PR53344: Emit code for final update of the inscan reduction vars in workshari...
authorAlexey Bataev <a.bataev@outlook.com>
Mon, 7 Mar 2022 21:11:12 +0000 (13:11 -0800)
committerAlexey Bataev <a.bataev@outlook.com>
Thu, 28 Apr 2022 17:41:28 +0000 (10:41 -0700)
Need to emit final update of the inscan reduction variables. For
worksharing loops, the reduction values are stored in the temp array,
need to copy the last element to the original var at the end of the
construct.

Differential Revision: https://reviews.llvm.org/D121156

clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/parallel_for_scan_codegen.cpp

index 4d8b0af..d6f81b3 100644 (file)
@@ -3515,6 +3515,57 @@ static void emitScanBasedDirectiveDecls(
   }
 }
 
+/// Copies final inscan reductions values to the original variables.
+/// The code is the following:
+/// \code
+/// <orig_var> = buffer[num_iters-1];
+/// \endcode
+static void emitScanBasedDirectiveFinals(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
+  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+  SmallVector<const Expr *, 4> Shareds;
+  SmallVector<const Expr *, 4> LHSs;
+  SmallVector<const Expr *, 4> RHSs;
+  SmallVector<const Expr *, 4> Privates;
+  SmallVector<const Expr *, 4> CopyOps;
+  SmallVector<const Expr *, 4> CopyArrayElems;
+  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+           "Only inscan reductions are expected.");
+    Shareds.append(C->varlist_begin(), C->varlist_end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    Privates.append(C->privates().begin(), C->privates().end());
+    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+    CopyArrayElems.append(C->copy_array_elems().begin(),
+                          C->copy_array_elems().end());
+  }
+  // Create temp var and copy LHS value to this temp value.
+  // LHS = TMP[LastIter];
+  llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
+      OMPScanNumIterations,
+      llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
+  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+    const Expr *PrivateExpr = Privates[I];
+    const Expr *OrigExpr = Shareds[I];
+    const Expr *CopyArrayElem = CopyArrayElems[I];
+    CodeGenFunction::OpaqueValueMapping IdxMapping(
+        CGF,
+        cast<OpaqueValueExpr>(
+            cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+        RValue::get(OMPLast));
+    LValue DestLVal = CGF.EmitLValue(OrigExpr);
+    LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
+    CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
+                    SrcLVal.getAddress(CGF),
+                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                    CopyOps[I]);
+  }
+}
+
 /// Emits the code for the directive with inscan reductions.
 /// The code is the following:
 /// \code
@@ -3709,6 +3760,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+      emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
   } else {
     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
                                                      HasCancel);
@@ -4282,23 +4335,25 @@ void CodeGenFunction::EmitOMPParallelForDirective(
     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
   };
   {
-    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+      CGCapturedStmtInfo CGSI(CR_OpenMP);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+      OMPLoopScope LoopScope(CGF, S);
+      return CGF.EmitScalarExpr(S.getNumIterations());
+    };
+    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
                      [](const OMPReductionClause *C) {
                        return C->getModifier() == OMPC_REDUCTION_inscan;
-                     })) {
-      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
-        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
-        CGCapturedStmtInfo CGSI(CR_OpenMP);
-        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
-        OMPLoopScope LoopScope(CGF, S);
-        return CGF.EmitScalarExpr(S.getNumIterations());
-      };
+                     });
+    if (IsInscan)
       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
-    }
     auto LPCRegion =
         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
                                    emitEmptyBoundParameters);
+    if (IsInscan)
+      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
   }
   // Check for outer lastprivate conditional update.
   checkForLastprivateConditionalUpdate(*this, S);
@@ -4313,23 +4368,25 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
   };
   {
-    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+      CGCapturedStmtInfo CGSI(CR_OpenMP);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+      OMPLoopScope LoopScope(CGF, S);
+      return CGF.EmitScalarExpr(S.getNumIterations());
+    };
+    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
                      [](const OMPReductionClause *C) {
                        return C->getModifier() == OMPC_REDUCTION_inscan;
-                     })) {
-      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
-        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
-        CGCapturedStmtInfo CGSI(CR_OpenMP);
-        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
-        OMPLoopScope LoopScope(CGF, S);
-        return CGF.EmitScalarExpr(S.getNumIterations());
-      };
+                     });
+    if (IsInscan)
       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
-    }
     auto LPCRegion =
         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
                                    emitEmptyBoundParameters);
+    if (IsInscan)
+      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
   }
   // Check for outer lastprivate conditional update.
   checkForLastprivateConditionalUpdate(*this, S);
index 975d7c3..21d4a44 100644 (file)
@@ -27,6 +27,13 @@ void baz(int n) {
   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
 
   // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+  // CHECK: [[LAST:%.+]] = mul nsw i64 9, %
+  // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]]
+  // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false)
+  // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9
+  // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]],
+  // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b,
 
   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]