// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]
+// Note that OpenMPIRBuilder puts the trailing arguments in a different order:
+// arguments that are wrapped into additional pointers precede the other
+// arguments. This is expected and not problematic because both the call and the
+// function are generated from the same place, and the function is internal.
// ALL: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
// ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
-// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
+// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{64|32}}*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{64|32}}* %{{.+}}, i8*** [[ARGC_ADDR]])
// ALL: ret i32 0
// ALL-NEXT: }
// ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc)
// CHECK-DEBUG: store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
// CHECK-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
-// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
+// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64* %{{.+}}, i8*** [[ARGC_ADDR]])
// ALL-DEBUG: ret i32 0
// ALL-DEBUG-NEXT: }
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}})
-// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}})
+// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i{{64|32}}*{{.*}} %{{.+}}, i8*** [[ARGC_REF:%.*]])
// CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// ALL: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// CHECK-NEXT: unreachable
// CHECK-NEXT: }
// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i64 %{{.+}})
-// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}})
+// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i64* %{{.+}}, i8*** [[ARGC_REF:%.*]])
// CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// ALL-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
BasicBlock *InsertBB = Builder.GetInsertBlock();
Function *OuterFn = InsertBB->getParent();
+ // Save the outer alloca block because the insertion iterator may get
+ // invalidated and we still need this later.
+ BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
+
// Vector to remember instructions we used only during the modeling but which
// we want to delete at the end.
SmallVector<Instruction *, 4> ToBeDeleted;
// Add some fake uses for OpenMP provided arguments.
ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
- ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+ Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
+ ToBeDeleted.push_back(ZeroAddrUse);
// ThenBB
// |
if (&V == TIDAddr || &V == ZeroAddr)
return;
- SmallVector<Use *, 8> Uses;
+ SetVector<Use *> Uses;
for (Use &U : V.uses())
if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
if (ParallelRegionBlockSet.count(UserI->getParent()))
- Uses.push_back(&U);
+ Uses.insert(&U);
+
+ // __kmpc_fork_call expects extra arguments as pointers. If the input
+ // already has a pointer type, everything is fine. Otherwise, store the
+ // value onto stack and load it back inside the to-be-outlined region. This
+ // will ensure only the pointer will be passed to the function.
+ // FIXME: if there are more than 15 trailing arguments, they must be
+ // additionally packed in a struct.
+ Value *Inner = &V;
+ if (!V.getType()->isPointerTy()) {
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
+
+ Builder.restoreIP(OuterAllocaIP);
+ Value *Ptr =
+ Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
+
+ // Store to stack at end of the block that currently branches to the entry
+ // block of the to-be-outlined region.
+ Builder.SetInsertPoint(InsertBB,
+ InsertBB->getTerminator()->getIterator());
+ Builder.CreateStore(&V, Ptr);
+
+ // Load back next to allocations in the to-be-outlined region.
+ Builder.restoreIP(InnerAllocaIP);
+ Inner = Builder.CreateLoad(Ptr);
+ }
Value *ReplacementValue = nullptr;
CallInst *CI = dyn_cast<CallInst>(&V);
ReplacementValue = PrivTID;
} else {
Builder.restoreIP(
- PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
+ PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
assert(ReplacementValue &&
"Expected copy/create callback to set replacement value!");
if (ReplacementValue == &V)
UPtr->set(ReplacementValue);
};
+ // Reset the inner alloca insertion as it will be used for loading the values
+ // wrapped into pointers before passing them into the to-be-outlined region.
+ // Configure it to insert immediately after the fake use of zero address so
+ // that they are available in the generated body and so that the
+ // OpenMP-related values (thread ID and zero address pointers) remain leading
+ // in the argument list.
+ InnerAllocaIP = IRBuilder<>::InsertPoint(
+ ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
+
+ // Reset the outer alloca insertion point to the entry of the relevant block
+ // in case it was invalidated.
+ OuterAllocaIP = IRBuilder<>::InsertPoint(
+ OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
+
for (Value *Input : Inputs) {
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
DebugLoc DL;
};
+// Returns the value stored in the given allocation. Returns null if the given
+// value is not a result of an allocation, if no value is stored or if there is
+// more than one store.
+static Value *findStoredValue(Value *AllocaValue) {
+ Instruction *Alloca = dyn_cast<AllocaInst>(AllocaValue);
+ if (!Alloca)
+ return nullptr;
+ StoreInst *Store = nullptr;
+ for (Use &U : Alloca->uses()) {
+ if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
+ EXPECT_EQ(Store, nullptr);
+ Store = CandidateStore;
+ }
+ }
+ if (!Store)
+ return nullptr;
+ return Store->getValueOperand();
+};
+
TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ Value &Orig, Value &Inner,
+ Value *&ReplacementValue) -> InsertPointTy {
++NumPrivatizedVars;
- if (!isa<AllocaInst>(VPtr)) {
- EXPECT_EQ(&VPtr, F->arg_begin());
- ReplacementValue = &VPtr;
+ if (!isa<AllocaInst>(Orig)) {
+ EXPECT_EQ(&Orig, F->arg_begin());
+ ReplacementValue = &Inner;
return CodeGenIP;
}
+ // Since the original value is an allocation, it has a pointer type and
+ // therefore no additional wrapping should happen.
+ EXPECT_EQ(&Orig, &Inner);
+
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
- Type *VTy = VPtr.getType()->getPointerElementType();
- Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
- ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Type *VTy = Inner.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
- EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+ EXPECT_EQ(findStoredValue(ForkCI->getArgOperand(3)), F->arg_begin());
}
TEST_F(OpenMPIRBuilderTest, ParallelNested) {
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ Value &Orig, Value &Inner,
+ Value *&ReplacementValue) -> InsertPointTy {
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
- Type *VTy = VPtr.getType()->getPointerElementType();
- Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
- ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Type *VTy = Inner.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ Value &Orig, Value &Inner,
+ Value *&ReplacementValue) -> InsertPointTy {
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
- Type *VTy = VPtr.getType()->getPointerElementType();
- Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
- ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Type *VTy = Inner.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+ Value &Orig, Value &Inner,
+ Value *&ReplacementValue) -> InsertPointTy {
++NumPrivatizedVars;
- if (!isa<AllocaInst>(VPtr)) {
- EXPECT_EQ(&VPtr, F->arg_begin());
- ReplacementValue = &VPtr;
+ if (!isa<AllocaInst>(Orig)) {
+ EXPECT_EQ(&Orig, F->arg_begin());
+ ReplacementValue = &Inner;
return CodeGenIP;
}
+ // Since the original value is an allocation, it has a pointer type and
+ // therefore no additional wrapping should happen.
+ EXPECT_EQ(&Orig, &Inner);
+
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
- Type *VTy = VPtr.getType()->getPointerElementType();
- Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
- ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+ Type *VTy = Inner.getType()->getPointerElementType();
+ Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+ ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 1));
- EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+ Value *StoredForkArg = findStoredValue(ForkCI->getArgOperand(3));
+ EXPECT_EQ(StoredForkArg, F->arg_begin());
EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
- EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
+ Value *StoredDirectArg = findStoredValue(DirectCI->getArgOperand(2));
+ EXPECT_EQ(StoredDirectArg, F->arg_begin());
}
TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
};
- auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
+ auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
Value *&) -> InsertPointTy {
++NumPrivatizedVars;
llvm_unreachable("No privatization callback call expected!");
}
}
+TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ Type *I32Ty = Type::getInt32Ty(M->getContext());
+ Type *I32PtrTy = Type::getInt32PtrTy(M->getContext());
+ Type *StructTy = StructType::get(I32Ty, I32PtrTy);
+ Type *StructPtrTy = StructTy->getPointerTo();
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+ FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
+ FunctionCallee TakeI32Func =
+ M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
+ FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy);
+ FunctionCallee TakeI32PtrFunc =
+ M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy);
+ FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
+ FunctionCallee TakeStructFunc =
+ M->getOrInsertFunction("take_struct", VoidTy, StructTy);
+ FunctionCallee RetStructPtrFunc =
+ M->getOrInsertFunction("ret_structptr", StructPtrTy);
+ FunctionCallee TakeStructPtrFunc =
+ M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy);
+ Value *I32Val = Builder.CreateCall(RetI32Func);
+ Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
+ Value *StructVal = Builder.CreateCall(RetStructFunc);
+ Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
+
+ Instruction *Internal;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &ContinuationBB) {
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Builder.restoreIP(CodeGenIP);
+ Internal = Builder.CreateCall(TakeI32Func, I32Val);
+ Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
+ Builder.CreateCall(TakeStructFunc, StructVal);
+ Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
+ };
+ auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
+ Value &Inner, Value *&ReplacementValue) {
+ ReplacementValue = &Inner;
+ return CodeGenIP;
+ };
+ auto FiniCB = [](InsertPointTy) {};
+
+ IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
+ IRBuilder<>::InsertPoint AfterIP =
+ OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false);
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ OMPBuilder.finalize();
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+ Function *OutlinedFn = Internal->getFunction();
+
+ Type *Arg2Type = OutlinedFn->getArg(2)->getType();
+ EXPECT_TRUE(Arg2Type->isPointerTy());
+ EXPECT_EQ(Arg2Type->getPointerElementType(), I32Ty);
+
+ // Arguments that need to be passed through pointers and reloaded will get
+ // used earlier in the functions and therefore will appear first in the
+ // argument list after outlining.
+ Type *Arg3Type = OutlinedFn->getArg(3)->getType();
+ EXPECT_TRUE(Arg3Type->isPointerTy());
+ EXPECT_EQ(Arg3Type->getPointerElementType(), StructTy);
+
+ Type *Arg4Type = OutlinedFn->getArg(4)->getType();
+ EXPECT_EQ(Arg4Type, I32PtrTy);
+
+ Type *Arg5Type = OutlinedFn->getArg(5)->getType();
+ EXPECT_EQ(Arg5Type, StructPtrTy);
+}
+
TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);