[OpenMP][IRBuilder] Support allocas in nested parallel regions
authorJohannes Doerfert <johannes@jdoerfert.de>
Fri, 19 Jun 2020 14:38:04 +0000 (09:38 -0500)
committerJohannes Doerfert <johannes@jdoerfert.de>
Thu, 30 Jul 2020 15:19:39 +0000 (10:19 -0500)
We need to keep track of the alloca insertion point (which we already
communicate via the callback to the user) as we place allocas as well.

Reviewed By: fghanim, SouraVX

Differential Revision: https://reviews.llvm.org/D82470

clang/lib/CodeGen/CGStmtOpenMP.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

index 0ee1133..df1cc16 100644 (file)
@@ -1707,9 +1707,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
 
     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
-    Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
-                                                FiniCB, IfCond, NumThreads,
-                                                ProcBind, S.hasCancel()));
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+    Builder.restoreIP(
+        OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
     return;
   }
 
index 95eed59..f813a73 100644 (file)
@@ -156,6 +156,7 @@ public:
   /// Generator for '#omp parallel'
   ///
   /// \param Loc The insert and source location description.
+  /// \param AllocaIP The insertion points to be used for alloca instructions.
   /// \param BodyGenCB Callback that will generate the region code.
   /// \param PrivCB Callback to copy a given variable (think copy constructor).
   /// \param FiniCB Callback to finalize variable copies.
@@ -166,10 +167,11 @@ public:
   ///
   /// \returns The insertion position *after* the parallel.
   IRBuilder<>::InsertPoint
-  CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
-                 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
-                 Value *IfCondition, Value *NumThreads,
-                 omp::ProcBindKind ProcBind, bool IsCancellable);
+  CreateParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
+                 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+                 FinalizeCallbackTy FiniCB, Value *IfCondition,
+                 Value *NumThreads, omp::ProcBindKind ProcBind,
+                 bool IsCancellable);
 
   /// Generator for '#omp flush'
   ///
index 9468a3a..a5fe4ec 100644 (file)
@@ -394,9 +394,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
 }
 
 IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
-    const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
-    PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
-    Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+    const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
+    BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
+    FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
+    omp::ProcBindKind ProcBind, bool IsCancellable) {
   if (!updateToLocation(Loc))
     return Loc.IP;
 
@@ -429,7 +430,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   // we want to delete at the end.
   SmallVector<Instruction *, 4> ToBeDeleted;
 
-  Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
+  // Change the location to the outer alloca insertion point to create and
+  // initialize the allocas we pass into the parallel region.
+  Builder.restoreIP(OuterAllocaIP);
   AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
   AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
 
@@ -481,9 +484,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
 
   // Generate the privatization allocas in the block that will become the entry
   // of the outlined function.
-  InsertPointTy AllocaIP(PRegEntryBB,
-                         PRegEntryBB->getTerminator()->getIterator());
-  Builder.restoreIP(AllocaIP);
+  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
+  InsertPointTy InnerAllocaIP = Builder.saveIP();
+
   AllocaInst *PrivTIDAddr =
       Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
   Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
@@ -512,7 +515,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   // Let the caller create the body.
   assert(BodyGenCB && "Expected body generation callback!");
   InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
-  BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
+  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
 
   LLVM_DEBUG(dbgs() << "After  body codegen: " << *OuterFn << "\n");
 
@@ -671,7 +674,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
       ReplacementValue = PrivTID;
     } else {
       Builder.restoreIP(
-          PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
+          PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
       assert(ReplacementValue &&
              "Expected copy/create callback to set replacement value!");
       if (ReplacementValue == &V)
@@ -686,6 +689,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
     LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
     PrivHelper(*Input);
   }
+  LLVM_DEBUG({
+    for (Value *Output : Outputs)
+      LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
+  });
   assert(Outputs.empty() &&
          "OpenMP outlining should not produce live-out values!");
 
index 2ba9d85..edd2c8f 100644 (file)
@@ -6,13 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "gtest/gtest.h"
@@ -360,9 +361,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
 
   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
 
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
   IRBuilder<>::InsertPoint AfterIP =
-      OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
-                                nullptr, OMP_PROC_BIND_default, false);
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -400,6 +403,205 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
 }
 
+TEST_F(OpenMPIRBuilderTest, ParallelNested) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  unsigned NumInnerBodiesGenerated = 0;
+  unsigned NumOuterBodiesGenerated = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumInnerBodiesGenerated;
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+  auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumOuterBodiesGenerated;
+    Builder.restoreIP(CodeGenIP);
+    BasicBlock *CGBB = CodeGenIP.getBlock();
+    BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+    CGBB->getTerminator()->eraseFromParent();
+    ;
+
+    IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
+        InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP);
+    Builder.CreateBr(NewBB);
+  };
+
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+  EXPECT_EQ(NumInnerBodiesGenerated, 1U);
+  EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 2U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize();
+
+  EXPECT_EQ(M->size(), 5U);
+  for (Function &OutlinedFn : *M) {
+    if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+      continue;
+    EXPECT_FALSE(verifyModule(*M, &errs()));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+    EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+    EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+    EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+    User *Usr = OutlinedFn.user_back();
+    ASSERT_TRUE(isa<ConstantExpr>(Usr));
+    CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+    ASSERT_NE(ForkCI, nullptr);
+
+    EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+    EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+    EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+    EXPECT_EQ(ForkCI->getArgOperand(1),
+              ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+    EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+  }
+}
+
+TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  unsigned NumInnerBodiesGenerated = 0;
+  unsigned NumOuterBodiesGenerated = 0;
+  unsigned NumFinalizationPoints = 0;
+
+  auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumInnerBodiesGenerated;
+  };
+
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+    // Trivial copy (=firstprivate).
+    Builder.restoreIP(AllocaIP);
+    Type *VTy = VPtr.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(V, ReplacementValue);
+    return CodeGenIP;
+  };
+
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+
+  auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            BasicBlock &ContinuationIP) {
+    ++NumOuterBodiesGenerated;
+    Builder.restoreIP(CodeGenIP);
+    BasicBlock *CGBB = CodeGenIP.getBlock();
+    BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
+    BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
+    CGBB->getTerminator()->eraseFromParent();
+    ;
+    NewBB1->getTerminator()->eraseFromParent();
+    ;
+
+    IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel(
+        InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP1);
+    Builder.CreateBr(NewBB1);
+
+    IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel(
+        InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
+        FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+    Builder.restoreIP(AfterIP2);
+    Builder.CreateBr(NewBB2);
+  };
+
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
+
+  EXPECT_EQ(NumInnerBodiesGenerated, 2U);
+  EXPECT_EQ(NumOuterBodiesGenerated, 1U);
+  EXPECT_EQ(NumFinalizationPoints, 3U);
+
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize();
+
+  EXPECT_EQ(M->size(), 6U);
+  for (Function &OutlinedFn : *M) {
+    if (F == &OutlinedFn || OutlinedFn.isDeclaration())
+      continue;
+    EXPECT_FALSE(verifyModule(*M, &errs()));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
+    EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
+    EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
+
+    EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
+    EXPECT_EQ(OutlinedFn.arg_size(), 2U);
+
+    unsigned NumAllocas = 0;
+    for (Instruction &I : instructions(OutlinedFn))
+      NumAllocas += isa<AllocaInst>(I);
+    EXPECT_EQ(NumAllocas, 1U);
+
+    EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
+    User *Usr = OutlinedFn.user_back();
+    ASSERT_TRUE(isa<ConstantExpr>(Usr));
+    CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
+    ASSERT_NE(ForkCI, nullptr);
+
+    EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
+    EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
+    EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
+    EXPECT_EQ(ForkCI->getArgOperand(1),
+              ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
+    EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
+  }
+}
+
 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
@@ -460,9 +662,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
     // No destructors.
   };
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
-      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
-      nullptr, OMP_PROC_BIND_default, false);
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                Builder.CreateIsNotNull(F->arg_begin()),
+                                nullptr, OMP_PROC_BIND_default, false);
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
@@ -585,9 +790,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
                        {Builder.getInt32(NumFinalizationPoints)});
   };
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
-      Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
-      nullptr, OMP_PROC_BIND_default, true);
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                Builder.CreateIsNotNull(F->arg_begin()),
+                                nullptr, OMP_PROC_BIND_default, true);
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 0U);