[OpenMP][OMPIRBuilder] Add generation of SIMD align assumptions to OMPIRBuilder
authorDominik Adamski <dominik.adamski@amd.com>
Thu, 8 Sep 2022 11:39:18 +0000 (06:39 -0500)
committerDominik Adamski <dominik.adamski@amd.com>
Tue, 18 Oct 2022 07:04:18 +0000 (02:04 -0500)
Currently generation of align assumptions for OpenMP simd construct is done
outside OMPIRBuilder for C code and it is not supported for Fortran.

According to OpenMP 5.0 standard (2.9.3) only pointers and arrays can be
aligned for C code.

If given aligned variable is pointer, then Clang generates the following set
of the LLVM IR isntructions to support simd align clause:

; memory allocation for pointer address:
%A.addr = alloca ptr, align 8
; some LLVM IR code
; Alignment instructions (alignment is equal to 32):
%0 = load ptr, ptr %A.addr, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 32) ]

If given aligned variable is array, then Clang generates the following set
of the LLVM IR isntructions to support simd align clause:

; memory allocation for array:
%B = alloca [10 x i32], align 16
; some LLVM IR code
; Alignment instructions (alignment is equal to 32):
%arraydecay = getelementptr inbounds [10 x i32], ptr %B, i64 0, i64 0
call void @llvm.assume(i1 true) [ "align"(ptr %arraydecay, i64 32) ]

OMPIRBuilder was modified to generate aligned assumptions. It generates only
llvm.assume calls. Frontend is responsible for generation of aligned pointer
and getting the default alignment value if user does not specify it in aligned
clause.

Unit and regression tests were added to check if aligned clause was handled correctly.

Differential Revision: https://reviews.llvm.org/D133578

Reviewed By: jdoerfert

clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/irbuilder_simd_aligned.cpp [new file with mode: 0644]
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

index d27e2c3..40d84d7 100644 (file)
@@ -2602,7 +2602,7 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
   for (OMPClause *C : S.clauses()) {
     // Currently only order, simdlen and safelen clauses are supported
     if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
-          isa<OMPOrderClause>(C)))
+          isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
       return false;
   }
 
@@ -2628,6 +2628,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
   }
   return true;
 }
+static llvm::MapVector<llvm::Value *, llvm::Value *>
+GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+  llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
+  for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
+    llvm::APInt ClauseAlignment(64, 0);
+    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+      auto *AlignmentCI =
+          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
+      ClauseAlignment = AlignmentCI->getValue();
+    }
+    for (const Expr *E : Clause->varlists()) {
+      llvm::APInt Alignment(ClauseAlignment);
+      if (Alignment == 0) {
+        // OpenMP [2.8.1, Description]
+        // If no optional parameter is specified, implementation-defined default
+        // alignments for SIMD instructions on the target platforms are assumed.
+        Alignment =
+            CGF.getContext()
+                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+                    E->getType()->getPointeeType()))
+                .getQuantity();
+      }
+      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
+             "alignment is not power of 2");
+      llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+      AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
+    }
+  }
+  return AlignedVars;
+}
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
   bool UseOMPIRBuilder =
@@ -2637,6 +2667,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
                                                           PrePostActionTy &) {
       // Use the OpenMPIRBuilder if enabled.
       if (UseOMPIRBuilder) {
+        llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
+            GetAlignedMapping(S, CGF);
         // Emit the associated statement and get its loop representation.
         const Stmt *Inner = S.getRawStmt();
         llvm::CanonicalLoopInfo *CLI =
@@ -2669,7 +2701,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
         }
         // Add simd metadata to the collapsed loop. Do not generate
         // another loop for if clause. Support for if clause is done earlier.
-        OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen);
+        OMPBuilder.applySimd(CLI, AlignedVars,
+                             /*IfCond*/ nullptr, Order, Simdlen, Safelen);
         return;
       }
     };
diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp
new file mode 100644 (file)
index 0000000..6af2f73
--- /dev/null
@@ -0,0 +1,180 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int a, b;
+};
+
+struct P {
+  int a, b;
+};
+
+//
+#define N 32
+
+// CHECK-LABEL: @_Z6simplePfS_Pi(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
+// CHECK-NEXT:    [[P:%.*]] = alloca %struct.S*, align 8
+// CHECK-NEXT:    [[D:%.*]] = alloca [32 x i32], align 16
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[A:%.*]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B:%.*]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[C:%.*]], i32** [[C_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32
+// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK:       for.body:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]]
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK:       for.inc:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK:       for.end:
+// CHECK-NEXT:    [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0
+// CHECK-NEXT:    store i32 3, i32* [[I1]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[I1]], i32** [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I1]], align 4
+// CHECK-NEXT:    store i32 [[TMP8]], i32* [[TMP7]], align 4
+// CHECK-NEXT:    call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 128) ]
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(%struct.S* [[TMP5]], i64 64) ]
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ]
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP12]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float
+// CHECK-NEXT:    [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float
+// CHECK-NEXT:    [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]]
+// CHECK-NEXT:    store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    store i32 3, i32* [[J]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[J]], i32** [[TMP19]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[J]], align 4
+// CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP20]], align 4
+// CHECK-NEXT:    call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]])
+// CHECK-NEXT:    [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER19:%.*]]
+// CHECK:       omp_loop.preheader19:
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20:%.*]]
+// CHECK:       omp_loop.header20:
+// CHECK-NEXT:    [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND21:%.*]]
+// CHECK:       omp_loop.cond21:
+// CHECK-NEXT:    [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]]
+// CHECK:       omp_loop.body22:
+// CHECK-NEXT:    call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK-NEXT:    [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64
+// CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]]
+// CHECK-NEXT:    store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC23]]
+// CHECK:       omp_loop.inc23:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK:       omp_loop.exit24:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER25:%.*]]
+// CHECK:       omp_loop.after25:
+// CHECK-NEXT:    ret void
+//
+void simple(float *a, float *b, int *c) {
+  S s, *p;
+  int D[N];
+  for (int i = 0; i <N; ++i)
+    D[i] = i;
+  P pp;
+#pragma omp simd aligned (a:128) aligned(p:64) aligned(D)
+  for (int i = 3; i < N; i += 5) {
+    a[i] = b[i] + s.a + p->a + D[i];
+  }
+
+#pragma omp simd
+  for (int j = 3; j < N; j += 5) {
+    c[j] = pp.a;
+  }
+}
+//.
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
+//.
+// CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !1 = !{i32 7, !"openmp", i32 50}
+// CHECK: !3 = distinct !{!3, !4}
+// CHECK: !4 = !{!"llvm.loop.mustprogress"}
+// CHECK: !5 = distinct !{}
+// CHECK: !6 = distinct !{!6, !7, !8}
+// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5}
+// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: !9 = distinct !{}
+// CHECK: !10 = distinct !{!10, !11, !8}
+// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9}
+//.
index 87f504e..c16230f 100644 (file)
@@ -617,13 +617,18 @@ public:
   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
   /// to false.
   ///
-  /// \param Loop    The loop to simd-ize.
-  /// \param IfCond  The value which corresponds to the if clause condition.
-  /// \param Order   The enum to map order clause
-  /// \param Simdlen The Simdlen length to apply to the simd loop.
-  /// \param Safelen The Safelen length to apply to the simd loop.
-  void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order,
-                 ConstantInt *Simdlen, ConstantInt *Safelen);
+  /// \param Loop        The loop to simd-ize.
+  /// \param AlignedVars The map which containts pairs of the pointer
+  ///                    and its corresponding alignment.
+  /// \param IfCond      The value which corresponds to the if clause
+  ///                    condition.
+  /// \param Order       The enum to map order clause.
+  /// \param Simdlen     The Simdlen length to apply to the simd loop.
+  /// \param Safelen     The Safelen length to apply to the simd loop.
+  void applySimd(CanonicalLoopInfo *Loop,
+                 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
+                 omp::OrderKind Order, ConstantInt *Simdlen,
+                 ConstantInt *Safelen);
 
   /// Generator for '#omp flush'
   ///
index 57370fc..adc5316 100644 (file)
@@ -3005,9 +3005,10 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
   Builder.CreateBr(NewBlocks.front());
 }
 
-void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond,
-                                OrderKind Order, ConstantInt *Simdlen,
-                                ConstantInt *Safelen) {
+void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
+                                MapVector<Value *, Value *> AlignedVars,
+                                Value *IfCond, OrderKind Order,
+                                ConstantInt *Simdlen, ConstantInt *Safelen) {
   LLVMContext &Ctx = Builder.getContext();
 
   Function *F = CanonicalLoop->getFunction();
@@ -3025,6 +3026,17 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond,
   LoopInfo &&LI = LIA.run(*F, FAM);
 
   Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
+  if (AlignedVars.size()) {
+    InsertPointTy IP = Builder.saveIP();
+    Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
+    for (auto &AlignedItem : AlignedVars) {
+      Value *AlignedPtr = AlignedItem.first;
+      Value *Alignment = AlignedItem.second;
+      Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(),
+                                        AlignedPtr, Alignment);
+    }
+    Builder.restoreIP(IP);
+  }
 
   if (IfCond) {
     ValueToValueMapTy VMap;
index 1dccdb0..af96ac2 100644 (file)
@@ -1767,11 +1767,12 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
   OpenMPIRBuilder OMPBuilder(*M);
-
+  MapVector<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                        /* Simdlen */ nullptr,
                        /* Safelen */ nullptr);
 
@@ -1798,13 +1799,76 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) {
   }));
 }
 
-TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
   OpenMPIRBuilder OMPBuilder(*M);
+  IRBuilder<> Builder(BB);
+  const int AlignmentValue = 32;
+  AllocaInst *Alloc1 =
+      Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1));
+  LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
+  MapVector<Value *, Value *> AlignedVars;
+  AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
+
+  CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
+
+  // Simd-ize the loop.
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
+                       /* Simdlen */ nullptr,
+                       /* Safelen */ nullptr);
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  PassBuilder PB;
+  FunctionAnalysisManager FAM;
+  PB.registerFunctionAnalyses(FAM);
+  LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
+
+  const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
+  EXPECT_EQ(TopLvl.size(), 1u);
+
+  Loop *L = TopLvl.front();
+  EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
+  EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
+
+  // Check for llvm.access.group metadata attached to the printf
+  // function in the loop body.
+  BasicBlock *LoopBody = CLI->getBody();
+  EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
+    return I.getMetadata("llvm.access.group") != nullptr;
+  }));
 
+  // Check if number of assumption instructions is equal to number of aligned
+  // variables
+  BasicBlock *LoopPreheader = CLI->getPreheader();
+  size_t NumAssummptionCallsInPreheader = count_if(
+      *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); });
+  EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
+
+  // Check if variables are correctly aligned
+  for (Instruction &Instr : *LoopPreheader) {
+    if (!isa<AssumeInst>(Instr))
+      continue;
+    AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
+    if (AssumeInstruction->getNumTotalBundleOperands()) {
+      auto Bundle = AssumeInstruction->getOperandBundleAt(0);
+      if (Bundle.getTagName() == "align") {
+        EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
+        auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
+        EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
+      }
+    }
+  }
+}
+TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+  OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars,
+                       /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                        /* Safelen */ nullptr);
 
@@ -1834,12 +1898,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
   OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop.
   OMPBuilder.applySimd(
-      CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
   OMPBuilder.finalize();
@@ -1870,13 +1935,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
-                       /* Simdlen */ nullptr,
-                       ConstantInt::get(Type::getInt32Ty(Ctx), 3));
+  OMPBuilder.applySimd(
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+      /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -1904,11 +1969,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
 
@@ -1939,6 +2005,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
 TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
   OpenMPIRBuilder OMPBuilder(*M);
   IRBuilder<> Builder(BB);
+  MapVector<Value *, Value *> AlignedVars;
   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
 
@@ -1953,7 +2020,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
 
   // Simd-ize the loop with if condition
-  OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                        /* Safelen */ nullptr);
 
index bd15fc4..5fa1593 100644 (file)
@@ -996,8 +996,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   if (llvm::Optional<uint64_t> safelenVar = loop.getSafelen())
     safelen = builder.getInt64(safelenVar.value());
 
+  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
   ompBuilder->applySimd(
-      loopInfo,
+      loopInfo, alignedVars,
       loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
                        : nullptr,
       llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);