/// The return block for the overall function.
BasicBlock *EndBB = nullptr;
+ /// A set containing the different GVN store sets needed. Each array contains
+ /// a sorted list of the different values that need to be stored into output
+ /// registers.
+ DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
+
/// Flag for whether the \ref ArgumentTypes have been defined after the
/// extraction of the first region.
bool InputTypesSet = false;
/// \param [in,out] NotSame contains the global value numbers where the
/// constant is not always the same, and must be passed in as an argument.
void findSameConstants(DenseSet<unsigned> &NotSame);
+
+ /// For the regions, look at each set of GVN stores needed and account for
+ /// each combination. Add an argument to the argument types if there is
+ /// more than one combination.
+ ///
+ /// \param [in] M - The module we are outlining from.
+ void collectGVNStoreSets(Module &M);
};
/// Move the contents of \p SourceBB to before the last instruction of \p
collectRegionsConstants(*Region, GVNToConstant, NotSame);
}
+void OutlinableGroup::collectGVNStoreSets(Module &M) {
+ for (OutlinableRegion *OS : Regions)
+ OutputGVNCombinations.insert(OS->GVNStores);
+
+ // We are adding an extracted argument to decide between which output path
+ // to use in the basic block. It is used in a switch statement and only
+ // needs to be an integer.
+ if (OutputGVNCombinations.size() > 1)
+ ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+}
+
Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
unsigned FunctionNameSuffix) {
assert(!Group.OutlinedFunction && "Function is already defined!");
for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
if (AggArgIdx == AggFunc->arg_size() - 1 &&
- Group.ArgumentTypes.size() > Group.NumAggregateInputs) {
+ Group.OutputGVNCombinations.size() > 1) {
// If we are on the last argument, and we need to differentiate between
// output blocks, add an integer to the argument list to determine
// what block to take
Call);
// It is possible that the call to the outlined function is either the first
- // instruction in the new block, the last instruction, or both. If either of
- // these is the case, we need to make sure that we replace the instruction in
- // the IRInstructionData struct with the new call.
+ // instruction is in the new block, the last instruction, or both. If either
+ // of these is the case, we need to make sure that we replace the instruction
+ // in the IRInstructionData struct with the new call.
CallInst *OldCall = Region.Call;
if (Region.NewFront->Inst == OldCall)
Region.NewFront->Inst = Call;
return RelevantInstructions;
}
+/// It is possible that there is a basic block that already performs the same
+/// stores. This returns a duplicate block, if it exists
+///
+/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputStoreBBs [in] The existing output blocks.
+/// \returns an optional value with the number output block if there is a match.
+Optional<unsigned>
+findDuplicateOutputBlock(BasicBlock *OutputBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+
+ bool WrongInst = false;
+ bool WrongSize = false;
+ unsigned MatchingNum = 0;
+ for (BasicBlock *CompBB : OutputStoreBBs) {
+ WrongInst = false;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ WrongSize = true;
+ MatchingNum++;
+ continue;
+ }
+
+ WrongSize = false;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ WrongInst = true;
+ break;
+ }
+
+ NIt++;
+ }
+ if (!WrongInst && !WrongSize)
+ return MatchingNum;
+
+ MatchingNum++;
+ }
+
+ return None;
+}
+
/// For the outlined section, move needed the StoreInsts for the output
-/// registers into their own block. Then, determine if there is a duplicate
+/// registers into their own block. Then, determine if there is a duplicate
/// output block already created.
///
/// \param [in] OG - The OutlinableGroup of regions to be outlined.
// be contained in a store, we replace the uses of the value with the value
// from the overall function, so that the store is storing the correct
// value from the overall function.
-
DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
OutputStoreBBs.end());
+ ExcludeBBs.insert(OutputBB);
std::vector<Instruction *> ExtractedFunctionInsts =
collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
std::vector<Instruction *> OverallFunctionInsts =
}
assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
+
+ // If the size of the block is 0, then there are no stores, and we do not
+ // need to save this block.
+ if (OutputBB->size() == 0) {
+ Region.OutputBlockNum = -1;
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ // Determine is there is a duplicate block.
+ Optional<unsigned> MatchingBB =
+ findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+
+ // If there is, we remove the new output block. If it does not,
+ // we add it to our list of output blocks.
+ if (MatchingBB.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Set output block for region in function"
+ << Region.ExtractedFunction << " to "
+ << MatchingBB.getValue());
+
+ Region.OutputBlockNum = MatchingBB.getValue();
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ Region.OutputBlockNum = OutputStoreBBs.size();
+
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *OutputBB);
+ OutputStoreBBs.push_back(OutputBB);
+ BranchInst::Create(EndBB, OutputBB);
}
/// Create the switch statement for outlined function to differentiate between
/// \param [in,out] OutputStoreBBs - The existing output blocks.
void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
ArrayRef<BasicBlock *> OutputStoreBBs) {
- Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), ReturnBlock,
- OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
+ // We only need the switch statement if there is more than one store
+ // combination.
+ if (OG.OutputGVNCombinations.size() > 1) {
+ Function *AggFunc = OG.OutlinedFunction;
+ // Create a final block
+ BasicBlock *ReturnBlock =
+ BasicBlock::Create(M.getContext(), "final_block", AggFunc);
+ Instruction *Term = EndBB->getTerminator();
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function with
+ // a fall through to the new return block
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (BasicBlock *BB : OutputStoreBBs) {
+ SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
+ BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
+ return;
+ }
+
+ // If there needs to be stores, move them from the output block to the end
+ // block to save on branching instructions.
+ if (OutputStoreBBs.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
+ << *OG.OutlinedFunction << "\n");
+ BasicBlock *OutputBlock = OutputStoreBBs[0];
+ Instruction *Term = OutputBlock->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBlock, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBlock->eraseFromParent();
}
return;
replaceArgumentUses(*CurrentOS, NewBB);
replaceConstants(*CurrentOS);
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) {
+ // If the new basic block has no new stores, we can erase it from the module.
+ // It it does, we create a branch instruction to the last basic block from the
+ // new one.
+ if (NewBB->size() == 0) {
+ CurrentOS->OutputBlockNum = -1;
+ NewBB->eraseFromParent();
+ } else {
BranchInst::Create(CurrentGroup.EndBB, NewBB);
OutputStoreBBs.push_back(NewBB);
- } else
- NewBB->eraseFromParent();
+ }
// Replace the call to the extracted function with the outlined function.
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
CurrentGroup.OutlinedFunction);
replaceArgumentUses(*CurrentOS, NewBB);
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- CurrentOS->OutputBlockNum = OutputStoreBBs.size();
- OutputStoreBBs.push_back(NewBB);
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
- OutputStoreBBs);
- } else
- NewBB->eraseFromParent();
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
+ CurrentGroup.EndBB, OutputMappings,
+ OutputStoreBBs);
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
}
// Create a switch statement to handle the different output schemes.
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs)
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
OutlinedFunctionNum++;
}
if (CurrentGroup.Regions.empty())
continue;
- // We are adding an extracted argument to decide between which output path
- // to use in the basic block. It is used in a switch statement and only
- // needs to be an integer.
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs)
- CurrentGroup.ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+ CurrentGroup.collectGVNStoreSets(M);
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]])
; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]]
; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]])
; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]])
; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]])
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]])
; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST4]])
; CHECK-NEXT: [[LT_CAST5:%.*]] = bitcast i32* [[DOTLOC2]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST5]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]])
; CHECK-NEXT: [[ADD2_RELOAD:%.*]] = load i32, i32* [[ADD2_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD3:%.*]] = load i32, i32* [[DOTLOC2]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST4]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
ret void
}
-; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]], i32 [[ARG5:%.*]]) #1 {
+; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]]) #1 {
; CHECK: entry_after_outline.exitStub:
-; CHECK-NEXT: switch i32 [[ARG5]], label [[BLOCK:%.*]] [
-; CHECK-NEXT: i32 0, label %[[BLOCK_0:.*]]
-; CHECK-NEXT: i32 1, label %[[BLOCK_1:.*]]
+; CHECK-NEXT: store i32 [[ADD:%.*]], i32* [[ARG3]], align 4
+; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[ARG4]], align 4
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG1]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ADD]] = add i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG2]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARG2]], align 4
-
-; CHECK: [[BLOCK_0]]:
-; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4
-; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4
-
-; CHECK: [[BLOCK_1]]:
-; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4
-; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4
+; CHECK-NEXT: [[TMP2]] = load i32, i32* [[ARG2]], align 4