From 1462e63f67a965defec035c3bc17a5ddeb366964 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 7 Mar 2022 13:11:12 -0800 Subject: [PATCH] [OPENMP]PR53344: Emit code for final update of the inscan reduction vars in worksharing loops. Need to emit final update of the inscan reduction variables. For worksharing loops, the reduction values are stored in the temp array, need to copy the last element to the original var at the end of the construct. Differential Revision: https://reviews.llvm.org/D121156 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 97 ++++++++++++++++++++----- clang/test/OpenMP/parallel_for_scan_codegen.cpp | 7 ++ 2 files changed, 84 insertions(+), 20 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4d8b0af..d6f81b3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3515,6 +3515,57 @@ static void emitScanBasedDirectiveDecls( } } +/// Copies final inscan reductions values to the original variables. +/// The code is the following: +/// \code +/// = buffer[num_iters-1]; +/// \endcode +static void emitScanBasedDirectiveFinals( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref NumIteratorsGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector Shareds; + SmallVector LHSs; + SmallVector RHSs; + SmallVector Privates; + SmallVector CopyOps; + SmallVector CopyArrayElems; + for (const auto *C : S.getClausesOfKind()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + Privates.append(C->privates().begin(), C->privates().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + // Create temp var and copy LHS value to this temp value. + // LHS = TMP[LastIter]; + llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( + OMPScanNumIterations, + llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(OMPLast)); + LValue DestLVal = CGF.EmitLValue(OrigExpr); + LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); + CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), + SrcLVal.getAddress(CGF), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } +} + /// Emits the code for the directive with inscan reductions. /// The code is the following: /// \code @@ -3709,6 +3760,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF, if (!isOpenMPParallelDirective(S.getDirectiveKind())) emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + if (!isOpenMPParallelDirective(S.getDirectiveKind())) + emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); } else { CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), HasCancel); @@ -4282,23 +4335,25 @@ void CodeGenFunction::EmitOMPParallelForDirective( (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; { - if (llvm::any_of(S.getClausesOfKind(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); @@ -4313,23 +4368,25 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { - if (llvm::any_of(S.getClausesOfKind(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); diff --git a/clang/test/OpenMP/parallel_for_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_scan_codegen.cpp index 975d7c3..21d4a44 100644 --- a/clang/test/OpenMP/parallel_for_scan_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_scan_codegen.cpp @@ -27,6 +27,13 @@ void baz(int n) { // CHECK: [[B_BUF:%.+]] = alloca double, i64 10, // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( + // CHECK: [[LAST:%.+]] = mul nsw i64 9, % + // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]] + // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false) + // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9 + // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]], + // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b, // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]] -- 2.7.4