[OpenMP][OMPIRBuilder] Add generation of SIMD align assumptions to OMPIRBuilder

author Dominik Adamski <dominik.adamski@amd.com>

Thu, 8 Sep 2022 11:39:18 +0000 (06:39 -0500)

committer Dominik Adamski <dominik.adamski@amd.com>

Tue, 18 Oct 2022 07:04:18 +0000 (02:04 -0500)
author Dominik Adamski <dominik.adamski@amd.com>
Thu, 8 Sep 2022 11:39:18 +0000 (06:39 -0500)
committer Dominik Adamski <dominik.adamski@amd.com>
Tue, 18 Oct 2022 07:04:18 +0000 (02:04 -0500)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp

index d27e2c3..40d84d7 100644 (file)
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2602,7 +2602,7 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
    for (OMPClause *C : S.clauses()) {
      // Currently only order, simdlen and safelen clauses are supported
      if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
-          isa<OMPOrderClause>(C)))
+          isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
        return false;
    }
  
@@ -2628,6 +2628,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
    }
    return true;
  }
+static llvm::MapVector<llvm::Value *, llvm::Value *>
+GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+  llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
+  for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
+    llvm::APInt ClauseAlignment(64, 0);
+    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+      auto *AlignmentCI =
+          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
+      ClauseAlignment = AlignmentCI->getValue();
+    }
+    for (const Expr *E : Clause->varlists()) {
+      llvm::APInt Alignment(ClauseAlignment);
+      if (Alignment == 0) {
+        // OpenMP [2.8.1, Description]
+        // If no optional parameter is specified, implementation-defined default
+        // alignments for SIMD instructions on the target platforms are assumed.
+        Alignment =
+            CGF.getContext()
+                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+                    E->getType()->getPointeeType()))
+                .getQuantity();
+      }
+      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
+             "alignment is not power of 2");
+      llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+      AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
+    }
+  }
+  return AlignedVars;
+}
  
  void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
    bool UseOMPIRBuilder =
@@ -2637,6 +2667,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
                                                            PrePostActionTy &) {
        // Use the OpenMPIRBuilder if enabled.
        if (UseOMPIRBuilder) {
+        llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
+            GetAlignedMapping(S, CGF);
          // Emit the associated statement and get its loop representation.
          const Stmt *Inner = S.getRawStmt();
          llvm::CanonicalLoopInfo *CLI =
@@ -2669,7 +2701,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
          }
          // Add simd metadata to the collapsed loop. Do not generate
          // another loop for if clause. Support for if clause is done earlier.
-        OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen);
+        OMPBuilder.applySimd(CLI, AlignedVars,
+                             /*IfCond*/ nullptr, Order, Simdlen, Safelen);
          return;
        }
      };
diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp

new file mode 100644 (file)

index 0000000..6af2f73
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp
@@ -0,0 +1,180 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int a, b;
+};
+
+struct P {
+  int a, b;
+};
+
+//
+#define N 32
+
+// CHECK-LABEL: @_Z6simplePfS_Pi(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
+// CHECK-NEXT:    [[P:%.*]] = alloca %struct.S*, align 8
+// CHECK-NEXT:    [[D:%.*]] = alloca [32 x i32], align 16
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[A:%.*]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B:%.*]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i32* [[C:%.*]], i32** [[C_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32
+// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK:       for.body:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]]
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK:       for.inc:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK:       for.end:
+// CHECK-NEXT:    [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0
+// CHECK-NEXT:    store i32 3, i32* [[I1]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[I1]], i32** [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I1]], align 4
+// CHECK-NEXT:    store i32 [[TMP8]], i32* [[TMP7]], align 4
+// CHECK-NEXT:    call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 128) ]
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(%struct.S* [[TMP5]], i64 64) ]
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ]
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP12]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float
+// CHECK-NEXT:    [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float
+// CHECK-NEXT:    [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]]
+// CHECK-NEXT:    store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    store i32 3, i32* [[J]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[J]], i32** [[TMP19]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[J]], align 4
+// CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP20]], align 4
+// CHECK-NEXT:    call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]])
+// CHECK-NEXT:    [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER19:%.*]]
+// CHECK:       omp_loop.preheader19:
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20:%.*]]
+// CHECK:       omp_loop.header20:
+// CHECK-NEXT:    [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND21:%.*]]
+// CHECK:       omp_loop.cond21:
+// CHECK-NEXT:    [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]]
+// CHECK:       omp_loop.body22:
+// CHECK-NEXT:    call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]]
+// CHECK-NEXT:    [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64
+// CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]]
+// CHECK-NEXT:    store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK-NEXT:    br label [[OMP_LOOP_INC23]]
+// CHECK:       omp_loop.inc23:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK:       omp_loop.exit24:
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER25:%.*]]
+// CHECK:       omp_loop.after25:
+// CHECK-NEXT:    ret void
+//
+void simple(float *a, float *b, int *c) {
+  S s, *p;
+  int D[N];
+  for (int i = 0; i <N; ++i)
+    D[i] = i;
+  P pp;
+#pragma omp simd aligned (a:128) aligned(p:64) aligned(D)
+  for (int i = 3; i < N; i += 5) {
+    a[i] = b[i] + s.a + p->a + D[i];
+  }
+
+#pragma omp simd
+  for (int j = 3; j < N; j += 5) {
+    c[j] = pp.a;
+  }
+}
+//.
+// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
+//.
+// CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
+// CHECK: !1 = !{i32 7, !"openmp", i32 50}
+// CHECK: !3 = distinct !{!3, !4}
+// CHECK: !4 = !{!"llvm.loop.mustprogress"}
+// CHECK: !5 = distinct !{}
+// CHECK: !6 = distinct !{!6, !7, !8}
+// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5}
+// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: !9 = distinct !{}
+// CHECK: !10 = distinct !{!10, !11, !8}
+// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9}
+//.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

index 87f504e..c16230f 100644 (file)
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -617,13 +617,18 @@ public:
    /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
    /// to false.
    ///
-  /// \param Loop    The loop to simd-ize.
-  /// \param IfCond  The value which corresponds to the if clause condition.
-  /// \param Order   The enum to map order clause
-  /// \param Simdlen The Simdlen length to apply to the simd loop.
-  /// \param Safelen The Safelen length to apply to the simd loop.
-  void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order,
-                 ConstantInt *Simdlen, ConstantInt *Safelen);
+  /// \param Loop        The loop to simd-ize.
+  /// \param AlignedVars The map which containts pairs of the pointer
+  ///                    and its corresponding alignment.
+  /// \param IfCond      The value which corresponds to the if clause
+  ///                    condition.
+  /// \param Order       The enum to map order clause.
+  /// \param Simdlen     The Simdlen length to apply to the simd loop.
+  /// \param Safelen     The Safelen length to apply to the simd loop.
+  void applySimd(CanonicalLoopInfo *Loop,
+                 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
+                 omp::OrderKind Order, ConstantInt *Simdlen,
+                 ConstantInt *Safelen);
  
    /// Generator for '#omp flush'
    ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

index 57370fc..adc5316 100644 (file)
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3005,9 +3005,10 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
    Builder.CreateBr(NewBlocks.front());
  }
  
-void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond,
-                                OrderKind Order, ConstantInt *Simdlen,
-                                ConstantInt *Safelen) {
+void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
+                                MapVector<Value *, Value *> AlignedVars,
+                                Value *IfCond, OrderKind Order,
+                                ConstantInt *Simdlen, ConstantInt *Safelen) {
    LLVMContext &Ctx = Builder.getContext();
  
    Function *F = CanonicalLoop->getFunction();
@@ -3025,6 +3026,17 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond,
    LoopInfo &&LI = LIA.run(*F, FAM);
  
    Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
+  if (AlignedVars.size()) {
+    InsertPointTy IP = Builder.saveIP();
+    Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
+    for (auto &AlignedItem : AlignedVars) {
+      Value *AlignedPtr = AlignedItem.first;
+      Value *Alignment = AlignedItem.second;
+      Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(),
+                                        AlignedPtr, Alignment);
+    }
+    Builder.restoreIP(IP);
+  }
  
    if (IfCond) {
      ValueToValueMapTy VMap;
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

index 1dccdb0..af96ac2 100644 (file)
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1767,11 +1767,12 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
  
  TEST_F(OpenMPIRBuilderTest, ApplySimd) {
    OpenMPIRBuilder OMPBuilder(*M);
-
+  MapVector<Value *, Value *> AlignedVars;
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
    // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                         /* Simdlen */ nullptr,
                         /* Safelen */ nullptr);
  
@@ -1798,13 +1799,76 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) {
    }));
  }
  
-TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
    OpenMPIRBuilder OMPBuilder(*M);
+  IRBuilder<> Builder(BB);
+  const int AlignmentValue = 32;
+  AllocaInst *Alloc1 =
+      Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1));
+  LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
+  MapVector<Value *, Value *> AlignedVars;
+  AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
+
+  CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
+
+  // Simd-ize the loop.
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
+                       /* Simdlen */ nullptr,
+                       /* Safelen */ nullptr);
+
+  OMPBuilder.finalize();
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  PassBuilder PB;
+  FunctionAnalysisManager FAM;
+  PB.registerFunctionAnalyses(FAM);
+  LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
+
+  const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
+  EXPECT_EQ(TopLvl.size(), 1u);
+
+  Loop *L = TopLvl.front();
+  EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
+  EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
+
+  // Check for llvm.access.group metadata attached to the printf
+  // function in the loop body.
+  BasicBlock *LoopBody = CLI->getBody();
+  EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
+    return I.getMetadata("llvm.access.group") != nullptr;
+  }));
  
+  // Check if number of assumption instructions is equal to number of aligned
+  // variables
+  BasicBlock *LoopPreheader = CLI->getPreheader();
+  size_t NumAssummptionCallsInPreheader = count_if(
+      *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); });
+  EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
+
+  // Check if variables are correctly aligned
+  for (Instruction &Instr : *LoopPreheader) {
+    if (!isa<AssumeInst>(Instr))
+      continue;
+    AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
+    if (AssumeInstruction->getNumTotalBundleOperands()) {
+      auto Bundle = AssumeInstruction->getOperandBundleAt(0);
+      if (Bundle.getTagName() == "align") {
+        EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
+        auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
+        EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
+      }
+    }
+  }
+}
+TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
+  OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
    // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars,
+                       /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
                         ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                         /* Safelen */ nullptr);
  
@@ -1834,12 +1898,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
  
  TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
    OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
  
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
    // Simd-ize the loop.
    OMPBuilder.applySimd(
-      CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
        /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
  
    OMPBuilder.finalize();
@@ -1870,13 +1935,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
  
  TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
    OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
  
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
-                       /* Simdlen */ nullptr,
-                       ConstantInt::get(Type::getInt32Ty(Ctx), 3));
+  OMPBuilder.applySimd(
+      CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+      /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
  
    OMPBuilder.finalize();
    EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -1904,11 +1969,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
  
  TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
    OpenMPIRBuilder OMPBuilder(*M);
+  MapVector<Value *, Value *> AlignedVars;
  
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
-  // Simd-ize the loop.
-  OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
+                       OrderKind::OMP_ORDER_unknown,
                         ConstantInt::get(Type::getInt32Ty(Ctx), 2),
                         ConstantInt::get(Type::getInt32Ty(Ctx), 3));
  
@@ -1939,6 +2005,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
  TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
    OpenMPIRBuilder OMPBuilder(*M);
    IRBuilder<> Builder(BB);
+  MapVector<Value *, Value *> AlignedVars;
    AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
    AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
  
@@ -1953,7 +2020,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
    CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
  
    // Simd-ize the loop with if condition
-  OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown,
+  OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
                         ConstantInt::get(Type::getInt32Ty(Ctx), 3),
                         /* Safelen */ nullptr);
  
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

index bd15fc4..5fa1593 100644 (file)
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -996,8 +996,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
    if (llvm::Optional<uint64_t> safelenVar = loop.getSafelen())
      safelen = builder.getInt64(safelenVar.value());
  
+  llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
    ompBuilder->applySimd(
-      loopInfo,
+      loopInfo, alignedVars,
        loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
                         : nullptr,
        llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
author	Dominik Adamski <dominik.adamski@amd.com>
	Thu, 8 Sep 2022 11:39:18 +0000 (06:39 -0500)
committer	Dominik Adamski <dominik.adamski@amd.com>
	Tue, 18 Oct 2022 07:04:18 +0000 (02:04 -0500)
clang/lib/CodeGen/CGStmtOpenMP.cpp		patch \| blob \| history
clang/test/OpenMP/irbuilder_simd_aligned.cpp	[new file with mode: 0644]	patch \| blob
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h		patch \| blob \| history
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp		patch \| blob \| history
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp		patch \| blob \| history
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp		patch \| blob \| history