From ccd314d3209a192fc17ad621cf7fe3f09f7c7b9f Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 8 Sep 2022 06:39:18 -0500 Subject: [PATCH] [OpenMP][OMPIRBuilder] Add generation of SIMD align assumptions to OMPIRBuilder Currently generation of align assumptions for OpenMP simd construct is done outside OMPIRBuilder for C code and it is not supported for Fortran. According to OpenMP 5.0 standard (2.9.3) only pointers and arrays can be aligned for C code. If given aligned variable is pointer, then Clang generates the following set of the LLVM IR isntructions to support simd align clause: ; memory allocation for pointer address: %A.addr = alloca ptr, align 8 ; some LLVM IR code ; Alignment instructions (alignment is equal to 32): %0 = load ptr, ptr %A.addr, align 8 call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 32) ] If given aligned variable is array, then Clang generates the following set of the LLVM IR isntructions to support simd align clause: ; memory allocation for array: %B = alloca [10 x i32], align 16 ; some LLVM IR code ; Alignment instructions (alignment is equal to 32): %arraydecay = getelementptr inbounds [10 x i32], ptr %B, i64 0, i64 0 call void @llvm.assume(i1 true) [ "align"(ptr %arraydecay, i64 32) ] OMPIRBuilder was modified to generate aligned assumptions. It generates only llvm.assume calls. Frontend is responsible for generation of aligned pointer and getting the default alignment value if user does not specify it in aligned clause. Unit and regression tests were added to check if aligned clause was handled correctly. Differential Revision: https://reviews.llvm.org/D133578 Reviewed By: jdoerfert --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 37 ++++- clang/test/OpenMP/irbuilder_simd_aligned.cpp | 180 +++++++++++++++++++++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 19 ++- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 18 ++- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 91 +++++++++-- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 +- 6 files changed, 323 insertions(+), 25 deletions(-) create mode 100644 clang/test/OpenMP/irbuilder_simd_aligned.cpp diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d27e2c3..40d84d7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2602,7 +2602,7 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { for (OMPClause *C : S.clauses()) { // Currently only order, simdlen and safelen clauses are supported if (!(isa(C) || isa(C) || - isa(C))) + isa(C) || isa(C))) return false; } @@ -2628,6 +2628,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { } return true; } +static llvm::MapVector +GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { + llvm::MapVector AlignedVars; + for (const auto *Clause : S.getClausesOfKind()) { + llvm::APInt ClauseAlignment(64, 0); + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = + cast(CGF.EmitScalarExpr(AlignmentExpr)); + ClauseAlignment = AlignmentCI->getValue(); + } + for (const Expr *E : Clause->varlists()) { + llvm::APInt Alignment(ClauseAlignment); + if (Alignment == 0) { + // OpenMP [2.8.1, Description] + // If no optional parameter is specified, implementation-defined default + // alignments for SIMD instructions on the target platforms are assumed. + Alignment = + CGF.getContext() + .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( + E->getType()->getPointeeType())) + .getQuantity(); + } + assert((Alignment == 0 || Alignment.isPowerOf2()) && + "alignment is not power of 2"); + llvm::Value *PtrValue = CGF.EmitScalarExpr(E); + AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); + } + } + return AlignedVars; +} void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { bool UseOMPIRBuilder = @@ -2637,6 +2667,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + llvm::MapVector AlignedVars = + GetAlignedMapping(S, CGF); // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = @@ -2669,7 +2701,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { } // Add simd metadata to the collapsed loop. Do not generate // another loop for if clause. Support for if clause is done earlier. - OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen); + OMPBuilder.applySimd(CLI, AlignedVars, + /*IfCond*/ nullptr, Order, Simdlen, Safelen); return; } }; diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp new file mode 100644 index 0000000..6af2f738 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -0,0 +1,180 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int a, b; +}; + +struct P { + int a, b; +}; + +// +#define N 32 + +// CHECK-LABEL: @_Z6simplePfS_Pi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK-NEXT: [[P:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[D:%.*]] = alloca [32 x i32], align 16 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4 +// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A:%.*]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B:%.*]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store i32* [[C:%.*]], i32** [[C_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0 +// CHECK-NEXT: store i32 3, i32* [[I1]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I1]], i32** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[I1]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], i32* [[TMP7]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 128) ] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(%struct.S* [[TMP5]], i64 64) ] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK-NEXT: [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]] +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float +// CHECK-NEXT: [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float +// CHECK-NEXT: [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]] +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]] +// CHECK-NEXT: store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: store i32 3, i32* [[J]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[J]], i32** [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK-NEXT: call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]]) +// CHECK-NEXT: [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER19:%.*]] +// CHECK: omp_loop.preheader19: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20:%.*]] +// CHECK: omp_loop.header20: +// CHECK-NEXT: [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND21:%.*]] +// CHECK: omp_loop.cond21: +// CHECK-NEXT: [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]] +// CHECK: omp_loop.body22: +// CHECK-NEXT: call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK-NEXT: [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: br label [[OMP_LOOP_INC23]] +// CHECK: omp_loop.inc23: +// CHECK-NEXT: [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK: omp_loop.exit24: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER25:%.*]] +// CHECK: omp_loop.after25: +// CHECK-NEXT: ret void +// +void simple(float *a, float *b, int *c) { + S s, *p; + int D[N]; + for (int i = 0; i a + D[i]; + } + +#pragma omp simd + for (int j = 3; j < N; j += 5) { + c[j] = pp.a; + } +} +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !3 = distinct !{!3, !4} +// CHECK: !4 = !{!"llvm.loop.mustprogress"} +// CHECK: !5 = distinct !{} +// CHECK: !6 = distinct !{!6, !7, !8} +// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5} +// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !9 = distinct !{} +// CHECK: !10 = distinct !{!10, !11, !8} +// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9} +//. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 87f504e..c16230f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -617,13 +617,18 @@ public: /// to the cloned loop. The cloned loop is executed when ifCond is evaluated /// to false. /// - /// \param Loop The loop to simd-ize. - /// \param IfCond The value which corresponds to the if clause condition. - /// \param Order The enum to map order clause - /// \param Simdlen The Simdlen length to apply to the simd loop. - /// \param Safelen The Safelen length to apply to the simd loop. - void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order, - ConstantInt *Simdlen, ConstantInt *Safelen); + /// \param Loop The loop to simd-ize. + /// \param AlignedVars The map which containts pairs of the pointer + /// and its corresponding alignment. + /// \param IfCond The value which corresponds to the if clause + /// condition. + /// \param Order The enum to map order clause. + /// \param Simdlen The Simdlen length to apply to the simd loop. + /// \param Safelen The Safelen length to apply to the simd loop. + void applySimd(CanonicalLoopInfo *Loop, + MapVector AlignedVars, Value *IfCond, + omp::OrderKind Order, ConstantInt *Simdlen, + ConstantInt *Safelen); /// Generator for '#omp flush' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 57370fc..adc5316 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3005,9 +3005,10 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop, Builder.CreateBr(NewBlocks.front()); } -void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, - OrderKind Order, ConstantInt *Simdlen, - ConstantInt *Safelen) { +void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, + MapVector AlignedVars, + Value *IfCond, OrderKind Order, + ConstantInt *Simdlen, ConstantInt *Safelen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); @@ -3025,6 +3026,17 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, LoopInfo &&LI = LIA.run(*F, FAM); Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + if (AlignedVars.size()) { + InsertPointTy IP = Builder.saveIP(); + Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); + for (auto &AlignedItem : AlignedVars) { + Value *AlignedPtr = AlignedItem.first; + Value *Alignment = AlignedItem.second; + Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(), + AlignedPtr, Alignment); + } + Builder.restoreIP(IP); + } if (IfCond) { ValueToValueMapTy VMap; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 1dccdb0..af96ac2 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1767,11 +1767,12 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { TEST_F(OpenMPIRBuilderTest, ApplySimd) { OpenMPIRBuilder OMPBuilder(*M); - + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, /* Simdlen */ nullptr, /* Safelen */ nullptr); @@ -1798,13 +1799,76 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) { })); } -TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { +TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + const int AlignmentValue = 32; + AllocaInst *Alloc1 = + Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1)); + LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); + MapVector AlignedVars; + AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); + + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + + // Simd-ize the loop. + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, + /* Safelen */ nullptr); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); + // Check if number of assumption instructions is equal to number of aligned + // variables + BasicBlock *LoopPreheader = CLI->getPreheader(); + size_t NumAssummptionCallsInPreheader = count_if( + *LoopPreheader, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); + + // Check if variables are correctly aligned + for (Instruction &Instr : *LoopPreheader) { + if (!isa(Instr)) + continue; + AssumeInst *AssumeInstruction = cast(&Instr); + if (AssumeInstruction->getNumTotalBundleOperands()) { + auto Bundle = AssumeInstruction->getOperandBundleAt(0); + if (Bundle.getTagName() == "align") { + EXPECT_TRUE(isa(Bundle.Inputs[1])); + auto ConstIntVal = dyn_cast(Bundle.Inputs[1]); + EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); + } + } + } +} +TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { + OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); @@ -1834,12 +1898,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. OMPBuilder.applySimd( - CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); @@ -1870,13 +1935,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { TEST_F(OpenMPIRBuilderTest, ApplySafelen) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, - /* Simdlen */ nullptr, - ConstantInt::get(Type::getInt32Ty(Ctx), 3)); + OMPBuilder.applySimd( + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -1904,11 +1969,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) { TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 2), ConstantInt::get(Type::getInt32Ty(Ctx), 3)); @@ -1939,6 +2005,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { OpenMPIRBuilder OMPBuilder(*M); IRBuilder<> Builder(BB); + MapVector AlignedVars; AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); @@ -1953,7 +2020,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop with if condition - OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index bd15fc4..5fa1593 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -996,8 +996,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, if (llvm::Optional safelenVar = loop.getSafelen()) safelen = builder.getInt64(safelenVar.value()); + llvm::MapVector alignedVars; ompBuilder->applySimd( - loopInfo, + loopInfo, alignedVars, loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) : nullptr, llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen); -- 2.7.4