return true;
}
-/// Given an unconditional branch that goes to BBEnd,
-/// check whether BBEnd has only two predecessors and the other predecessor
-/// ends with an unconditional branch. If it is true, sink any common code
-/// in the two predecessors to BBEnd.
-static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
- assert(BI1->isUnconditional());
- BasicBlock *BB1 = BI1->getParent();
- BasicBlock *BBEnd = BI1->getSuccessor(0);
-
- // Check that BBEnd has two predecessors and the other predecessor ends with
- // an unconditional branch.
- pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
- BasicBlock *Pred0 = *PI++;
- if (PI == PE) // Only one predecessor.
- return false;
- BasicBlock *Pred1 = *PI++;
- if (PI != PE) // More than two predecessors.
+// Return true if V0 and V1 are equivalent. This handles the obvious cases
+// where V0 == V1 and V0 and V1 are both identical instructions, but also
+// handles loads and stores with identical operands.
+//
+// Because determining if two memory instructions are equivalent
+// depends on control flow, the \c At0 and \c At1 parameters specify a
+// location for the query. This function is essentially answering the
+// query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1
+// equivalent?". In practice this means checking that moving V0 to At0
+// doesn't cross any other memory instructions.
+static bool areValuesTriviallySame(Value *V0, BasicBlock::const_iterator At0,
+ Value *V1, BasicBlock::const_iterator At1) {
+ if (V0 == V1)
+ return true;
+
+ // Also check for instructions that are identical but not pointer-identical.
+ // This can include load instructions that haven't been CSE'd.
+ if (!isa<Instruction>(V0) || !isa<Instruction>(V1))
return false;
- BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
- BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
- if (!BI2 || !BI2->isUnconditional())
+ const auto *I0 = cast<Instruction>(V0);
+ const auto *I1 = cast<Instruction>(V1);
+ if (!I0->isIdenticalToWhenDefined(I1))
return false;
- // Gather the PHI nodes in BBEnd.
- SmallDenseMap<std::pair<Value *, Value *>, PHINode *> JointValueMap;
- Instruction *FirstNonPhiInBBEnd = nullptr;
- for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BB2V = PN->getIncomingValueForBlock(BB2);
- JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
- } else {
- FirstNonPhiInBBEnd = &*I;
- break;
- }
- }
- if (!FirstNonPhiInBBEnd)
+ if (!I0->mayReadOrWriteMemory())
+ return true;
+
+ // Instructions that may read or write memory have extra restrictions. We
+ // must ensure we don't treat %a and %b as equivalent in code such as:
+ //
+ // %a = load %x
+ // store %x, 1
+ // if (%c) {
+ // %b = load %x
+ // %d = add %b, 1
+ // } else {
+ // %d = add %a, 1
+ // }
+
+ // Be conservative. We don't want to search the entire CFG between def
+ // and use; if the def isn't in the same block as the use just bail.
+ if (I0->getParent() != At0->getParent() ||
+ I1->getParent() != At1->getParent())
return false;
- // This does very trivial matching, with limited scanning, to find identical
- // instructions in the two blocks. We scan backward for obviously identical
- // instructions in an identical order.
- BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
- RE1 = BB1->getInstList().rend(),
- RI2 = BB2->getInstList().rbegin(),
- RE2 = BB2->getInstList().rend();
- // Skip debug info.
- while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
- ++RI1;
- if (RI1 == RE1)
+ // Again, be super conservative. Ideally we'd be able to query AliasAnalysis
+ // but we currently don't have that available.
+ auto WritesMemory = [](const Instruction &I) {
+ return I.mayReadOrWriteMemory();
+ };
+ if (std::any_of(std::next(I0->getIterator()), At0, WritesMemory))
return false;
- while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
- ++RI2;
- if (RI2 == RE2)
+ if (std::any_of(std::next(I1->getIterator()), At1, WritesMemory))
return false;
- // Skip the unconditional branches.
- ++RI1;
- ++RI2;
+ return true;
+}
- bool Changed = false;
- while (RI1 != RE1 && RI2 != RE2) {
- // Skip debug info.
- while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
- ++RI1;
- if (RI1 == RE1)
- return Changed;
- while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
- ++RI2;
- if (RI2 == RE2)
- return Changed;
+// Is it legal to place a variable in operand \c OpIdx of \c I?
+// FIXME: This should be promoted to Instruction.
+static bool canReplaceOperandWithVariable(const Instruction *I,
+ unsigned OpIdx) {
+ // Early exit.
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return true;
- Instruction *I1 = &*RI1, *I2 = &*RI2;
- auto InstPair = std::make_pair(I1, I2);
- // I1 and I2 should have a single use in the same PHI node, and they
- // perform the same operation.
- // Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I1) || isa<PHINode>(I2) || isa<TerminatorInst>(I1) ||
- isa<TerminatorInst>(I2) || I1->isEHPad() || I2->isEHPad() ||
- isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
- I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
- I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
- !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair))
- return Changed;
+ switch (I->getOpcode()) {
+ default:
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ // FIXME: many arithmetic intrinsics have no issue taking a
+ // variable, however it's hard to distingish these from
+ // specials such as @llvm.frameaddress that require a constant.
+ return !isa<IntrinsicInst>(I);
+ case Instruction::ShuffleVector:
+ // Shufflevector masks are constant.
+ return OpIdx != 2;
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
+ // All operands apart from the first are constant.
+ return OpIdx == 0;
+ case Instruction::Alloca:
+ return false;
+ case Instruction::GetElementPtr:
+ if (OpIdx == 0)
+ return true;
+ gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1);
+ return !It->isStructTy();
+ }
+}
- // Check whether we should swap the operands of ICmpInst.
- // TODO: Add support of communativity.
- ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
- bool SwapOpnds = false;
- if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
- ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
- (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
- ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
- ICmp2->swapOperands();
- SwapOpnds = true;
- }
- if (!I1->isSameOperationAs(I2)) {
- if (SwapOpnds)
- ICmp2->swapOperands();
- return Changed;
- }
+// All blocks in Blocks unconditionally jump to a common successor. Analyze
+// the last non-terminator instruction in each block and return true if it would
+// be possible to sink them into their successor, creating one common
+// instruction instead. Set NumPHIsRequired to the number of PHI nodes that
+// would need to be created during sinking.
+static bool canSinkLastInstruction(ArrayRef<BasicBlock*> Blocks,
+ unsigned &NumPHIsRequired) {
+ SmallVector<Instruction*,4> Insts;
+ for (auto *BB : Blocks) {
+ if (BB->getTerminator() == &BB->front())
+ // Block was empty.
+ return false;
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ }
- // The operands should be either the same or they need to be generated
- // with a PHI node after sinking. We only handle the case where there is
- // a single pair of different operands.
- Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
- unsigned Op1Idx = ~0U;
- for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
- if (I1->getOperand(I) == I2->getOperand(I))
- continue;
- // Early exit if we have more-than one pair of different operands or if
- // we need a PHI node to replace a constant.
- if (Op1Idx != ~0U || isa<Constant>(I1->getOperand(I)) ||
- isa<Constant>(I2->getOperand(I))) {
- // If we can't sink the instructions, undo the swapping.
- if (SwapOpnds)
- ICmp2->swapOperands();
- return Changed;
- }
- DifferentOp1 = I1->getOperand(I);
- Op1Idx = I;
- DifferentOp2 = I2->getOperand(I);
+ // Prune out obviously bad instructions to move. Any non-store instruction
+ // must have exactly one use, and we check later that use is by a single,
+ // common PHI instruction in the successor.
+ for (auto *I : Insts) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return false;
+ // Apart from loads and stores, we won't move anything that could
+ // change memory or have sideeffects.
+ if (!isa<StoreInst>(I) && !isa<LoadInst>(I) &&
+ (I->mayHaveSideEffects() || I->mayHaveSideEffects()))
+ return false;
+ // Everything must have only one use too, apart from stores which
+ // have no uses.
+ if (!isa<StoreInst>(I) && !I->hasOneUse())
+ return false;
+ }
+
+ const Instruction *I0 = Insts.front();
+ for (auto *I : Insts)
+ if (!I->isSameOperationAs(I0))
+ return false;
+
+ // If this isn't a store, check the only user is a single PHI.
+ if (!isa<StoreInst>(I0)) {
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+ if (!PNUse ||
+ !all_of(Insts, [&PNUse](const Instruction *I) {
+ return *I->user_begin() == PNUse;
+ }))
+ return false;
+ }
+
+ NumPHIsRequired = 0;
+ for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
+ if (I0->getOperand(OI)->getType()->isTokenTy())
+ // Don't touch any operand of token type.
+ return false;
+ auto SameAsI0 = [&I0, OI](const Instruction *I) {
+ return areValuesTriviallySame(I->getOperand(OI), I->getIterator(),
+ I0->getOperand(OI), I0->getIterator());
+ };
+ if (!all_of(Insts, SameAsI0)) {
+ if (!canReplaceOperandWithVariable(I0, OI))
+ // We can't create a PHI from this GEP.
+ return false;
+ if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI != 0)
+ // Don't create indirect calls!
+ // FIXME: if the call was *already* indirect, we should do this.
+ return false;
+ ++NumPHIsRequired;
}
+ }
+ return true;
+}
- DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
- DEBUG(dbgs() << " " << *I2 << "\n");
-
- // We insert the pair of different operands to JointValueMap and
- // remove (I1, I2) from JointValueMap.
- if (Op1Idx != ~0U) {
- auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
- if (!NewPN) {
- NewPN =
- PHINode::Create(DifferentOp1->getType(), 2,
- DifferentOp1->getName() + ".sink", &BBEnd->front());
- NewPN->addIncoming(DifferentOp1, BB1);
- NewPN->addIncoming(DifferentOp2, BB2);
- DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
- }
- // I1 should use NewPN instead of DifferentOp1.
- I1->setOperand(Op1Idx, NewPN);
+// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// instruction of every block in Blocks to their common successor, commoning
+// into one instruction.
+static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
+ unsigned Dummy;
+ (void)Dummy;
+ assert(canSinkLastInstruction(Blocks, Dummy) &&
+ "Must analyze before transforming!");
+ auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+
+ // canSinkLastInstruction returning true guarantees that every block has at
+ // least one non-terminator instruction.
+ SmallVector<Instruction*,4> Insts;
+ for (auto *BB : Blocks)
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+
+ // We don't need to do any checking here; canSinkLastInstruction should have
+ // done it all for us.
+ Instruction *I0 = Insts.front();
+ SmallVector<Value*, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ // This check is different to that in canSinkLastInstruction. There, we
+ // cared about the global view once simplifycfg (and instcombine) have
+ // completed - it takes into account PHIs that become trivially
+ // simplifiable. However here we need a more local view; if an operand
+ // differs we create a PHI and rely on instcombine to clean up the very
+ // small mess we may make.
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
}
- PHINode *OldPN = JointValueMap[InstPair];
- JointValueMap.erase(InstPair);
-
- // We need to update RE1 and RE2 if we are going to sink the first
- // instruction in the basic block down.
- bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front());
- // Sink the instruction.
- BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
- BB1->getInstList(), I1);
- if (!OldPN->use_empty())
- OldPN->replaceAllUsesWith(I1);
- OldPN->eraseFromParent();
- if (!I2->use_empty())
- I2->replaceAllUsesWith(I1);
- I1->intersectOptionalDataWith(I2);
- // TODO: Use combineMetadata here to preserve what metadata we can
- // (analogous to the hoisting case above).
- I2->eraseFromParent();
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ if (!isa<StoreInst>(I0)) {
+ // canSinkLastInstruction checked that all instructions were used by
+ // one and only one PHI node. Find that now, RAUW it to our common
+ // instruction and nuke it.
+ assert(I0->hasOneUse());
+ auto *PN = cast<PHINode>(*I0->user_begin());
+ PN->replaceAllUsesWith(I0);
+ PN->eraseFromParent();
+ }
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts)
+ if (I != I0)
+ I->eraseFromParent();
+}
+
+/// Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
- if (UpdateRE1)
- RE1 = BB1->getInstList().rend();
- if (UpdateRE2)
- RE2 = BB2->getInstList().rend();
- FirstNonPhiInBBEnd = &*I1;
+ SmallVector<BasicBlock*,4> Blocks;
+ for (auto *BB : predecessors(BBEnd))
+ Blocks.push_back(BB);
+ if (Blocks.size() != 2 ||
+ !all_of(Blocks, [](const BasicBlock *BB) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ return BI && BI->isUnconditional();
+ }))
+ return false;
+
+ bool Changed = false;
+ unsigned NumPHIsToInsert;
+ while (canSinkLastInstruction(Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1) {
+ sinkLastInstruction(Blocks);
NumSinkCommons++;
Changed = true;
}
; CHECK: call
; CHECK: add
; CHECK-NOT: br
+
+define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test4
+; CHECK: select
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test5
+; CHECK: store volatile
+; CHECK: store
+
+define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test6
+; CHECK: select
+; CHECK: store volatile
+; CHECK-NOT: store
+
+define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %z = load volatile i32, i32* %y
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test7
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; %z and %w are in different blocks. We shouldn't sink the add because
+; there may be intervening memory instructions.
+define i32 @test8(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ %z = load volatile i32, i32* %y
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test8
+; CHECK: add
+; CHECK: add
+
+; The extra store in %if.then means %z and %w are not equivalent.
+define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ store i32 7, i32* %p
+ %z = load volatile i32, i32* %y
+ store i32 6, i32* %p
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test9
+; CHECK: add
+; CHECK: add
+
+%struct.anon = type { i32, i32 }
+
+; The GEP indexes a struct type so cannot have a variable last index.
+define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 5
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+ store volatile i32 %x, i32* %gepa
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %x, 6
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ store volatile i32 %x, i32* %gepb
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test10
+; CHECK: getelementptr
+; CHECK: getelementptr
+; CHECK: phi
+; CHECK: store volatile
+
+; The shufflevector's mask operand cannot be merged in a PHI.
+define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 0, i32 1>
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 1, i32 0>
+ br label %if.end
+
+if.end:
+ %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test11
+; CHECK: shufflevector
+; CHECK: shufflevector
+
+; We can't common an intrinsic!
+define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = call i32 @llvm.ctlz.i32(i32 %x)
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = call i32 @llvm.cttz.i32(i32 %x)
+ br label %if.end
+
+if.end:
+ %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+declare i32 @llvm.ctlz.i32(i32 %x) readnone
+declare i32 @llvm.cttz.i32(i32 %x) readnone
+
+; CHECK-LABEL: test12
+; CHECK: call i32 @llvm.ctlz
+; CHECK: call i32 @llvm.cttz
+