#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
const Value *SavedLHS;
const Value *SavedRHS;
- /// The current mapping from old local values to new local values.
+ // The current mapping from old local values to new local values.
DenseMap<const Value *, const Value *> Values;
- /// The current mapping from old blocks to new blocks.
+ // The current mapping from old blocks to new blocks.
DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
+ // The tentative mapping from old local values while comparing a pair of
+ // basic blocks. Once the pair has been processed, the tentative mapping is
+ // committed to the Values map.
DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
+ // Equivalence Assumptions
+ //
+ // For basic blocks in loops, some values in phi nodes may depend on
+ // values from not yet processed basic blocks in the loop. When encountering
+ // such values, we optimistically asssume their equivalence and store this
+ // assumption in a BlockDiffCandidate for the pair of compared BBs.
+ //
+ // Once we have diffed all BBs, for every BlockDiffCandidate, we check all
+ // stored assumptions using the Values map that stores proven equivalences
+ // between the old and new values, and report a diff if an assumption cannot
+ // be proven to be true.
+ //
+ // Note that after having made an assumption, all further determined
+ // equivalences implicitly depend on that assumption. These will not be
+ // reverted or reported if the assumption proves to be false, because these
+ // are considered indirect diffs caused by earlier direct diffs.
+ //
+ // We aim to avoid false negatives in llvm-diff, that is, ensure that
+ // whenever no diff is reported, the functions are indeed equal. If
+ // assumptions were made, this is not entirely clear, because in principle we
+ // could end up with a circular proof where the proof of equivalence of two
+ // nodes is depending on the assumption of their equivalence.
+ //
+ // To see that assumptions do not add false negatives, note that if we do not
+ // report a diff, this means that there is an equivalence mapping between old
+ // and new values that is consistent with all assumptions made. The circular
+ // dependency that exists on an IR value level does not exist at run time,
+ // because the values selected by the phi nodes must always already have been
+ // computed. Hence, we can prove equivalence of the old and new functions by
+ // considering step-wise parallel execution, and incrementally proving
+ // equivalence of every new computed value. Another way to think about it is
+ // to imagine cloning the loop BBs for every iteration, turning the loops
+ // into (possibly infinite) DAGs, and proving equivalence by induction on the
+ // iteration, using the computed value mapping.
+
+ // The class BlockDiffCandidate stores pairs which either have already been
+ // proven to differ, or pairs whose equivalence depends on assumptions to be
+ // verified later.
+ struct BlockDiffCandidate {
+ const BasicBlock *LBB;
+ const BasicBlock *RBB;
+ // Maps old values to assumed-to-be-equivalent new values
+ SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions;
+ // If set, we already know the blocks differ.
+ bool KnownToDiffer;
+ };
+
+ // List of block diff candidates in the order found by processing.
+ // We generate reports in this order.
+ // For every LBB, there may only be one corresponding RBB.
+ SmallVector<BlockDiffCandidate> BlockDiffCandidates;
+ // Maps LBB to the index of its BlockDiffCandidate, if existing.
+ DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices;
+
+ // Note: Every LBB must always be queried together with the same RBB.
+ // The returned reference is not permanently valid and should not be stored.
+ BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB,
+ const BasicBlock *RBB) {
+ auto It = BlockDiffCandidateIndices.find(LBB);
+ // Check if LBB already has a diff candidate
+ if (It == BlockDiffCandidateIndices.end()) {
+ // Add new one
+ BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size();
+ BlockDiffCandidates.push_back(
+ {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false});
+ return BlockDiffCandidates.back();
+ }
+ // Use existing one
+ BlockDiffCandidate &Result = BlockDiffCandidates[It->second];
+ assert(Result.RBB == RBB && "Inconsistent basic block pairing!");
+ return Result;
+ }
+
+ // Optionally passed to equivalence checker functions, so these can add
+ // assumptions in BlockDiffCandidates. Its presence controls whether
+ // assumptions are generated.
+ struct AssumptionContext {
+ // The two basic blocks that need the two compared values to be equivalent.
+ const BasicBlock *LBB;
+ const BasicBlock *RBB;
+ };
+
unsigned getUnprocPredCount(const BasicBlock *Block) const {
unsigned Count = 0;
for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
void unify(const Instruction *L, const Instruction *R) {
DifferenceEngine::Context C(Engine, L, R);
- bool Result = diff(L, R, true, true);
+ bool Result = diff(L, R, true, true, true);
assert(!Result && "structural differences second time around?");
(void) Result;
if (!L->use_empty())
}
}
+ void checkAndReportDiffCandidates() {
+ for (BlockDiffCandidate &BDC : BlockDiffCandidates) {
+
+ // Check assumptions
+ for (const auto &[L, R] : BDC.EquivalenceAssumptions) {
+ auto It = Values.find(L);
+ if (It == Values.end() || It->second != R) {
+ BDC.KnownToDiffer = true;
+ break;
+ }
+ }
+
+ // Run block diff if the BBs differ
+ if (BDC.KnownToDiffer) {
+ DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB);
+ runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin());
+ }
+ }
+ }
+
void diff(const BasicBlock *L, const BasicBlock *R) {
DifferenceEngine::Context C(Engine, L, R);
// If the instructions differ, start the more sophisticated diff
// algorithm at the start of the block.
- if (diff(LeftI, RightI, false, false)) {
+ if (diff(LeftI, RightI, false, false, true)) {
TentativeValues.clear();
- return runBlockDiff(L->begin(), R->begin());
+ // Register (L, R) as diffing pair. Note that we could directly emit a
+ // block diff here, but this way we ensure all diffs are emitted in one
+ // consistent order, independent of whether the diffs were detected
+ // immediately or via invalid assumptions.
+ getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true;
+ return;
}
// Otherwise, tentatively unify them.
bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
// FIXME: call attributes
- if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
+ AssumptionContext AC = {L.getParent(), R.getParent()};
+ if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(),
+ &AC)) {
if (Complain) Engine.log("called functions differ");
return true;
}
return true;
}
for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
- if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
+ if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) {
if (Complain)
Engine.logf("arguments %l and %r differ")
<< L.getArgOperand(I) << R.getArgOperand(I);
return false;
}
+ // If AllowAssumptions is enabled, whenever we encounter a pair of values
+ // that we cannot prove to be equivalent, we assume equivalence and store that
+ // assumption to be checked later in BlockDiffCandidates.
bool diff(const Instruction *L, const Instruction *R, bool Complain,
- bool TryUnify) {
+ bool TryUnify, bool AllowAssumptions) {
// FIXME: metadata (if Complain is set)
+ AssumptionContext ACValue = {L->getParent(), R->getParent()};
+ // nullptr AssumptionContext disables assumption generation.
+ const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr;
// Different opcodes always imply different operations.
if (L->getOpcode() != R->getOpcode()) {
tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
if (!equivalentAsOperands(LI.getIncomingValue(I),
- RI.getIncomingValue(I))) {
+ RI.getIncomingValue(I), AC)) {
if (Complain)
Engine.log("PHI node incoming values differ");
return true;
}
if (LI->isConditional()) {
- if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
if (Complain) Engine.log("branch conditions differ");
return true;
}
return true;
}
- if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
+ if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) {
if (Complain) Engine.log("indirectbr addresses differ");
return true;
}
} else if (isa<SwitchInst>(L)) {
const SwitchInst *LI = cast<SwitchInst>(L);
const SwitchInst *RI = cast<SwitchInst>(R);
- if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
if (Complain) Engine.log("switch conditions differ");
return true;
}
for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
Value *LO = L->getOperand(I), *RO = R->getOperand(I);
- if (!equivalentAsOperands(LO, RO)) {
+ if (!equivalentAsOperands(LO, RO, AC)) {
if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
return true;
}
}
public:
- bool equivalentAsOperands(const Constant *L, const Constant *R) {
+ bool equivalentAsOperands(const Constant *L, const Constant *R,
+ const AssumptionContext *AC) {
// Use equality as a preliminary filter.
if (L == R)
return true;
// Compare constant expressions structurally.
if (isa<ConstantExpr>(L))
- return equivalentAsOperands(cast<ConstantExpr>(L),
- cast<ConstantExpr>(R));
+ return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R),
+ AC);
// Constants of the "same type" don't always actually have the same
// type; I don't know why. Just white-list them.
if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
return false;
for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
- if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
+ if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC))
return false;
}
return true;
for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
if (!equivalentAsOperands(CAL->getAggregateElement(I),
- CAR->getAggregateElement(I)))
+ CAR->getAggregateElement(I), AC))
return false;
}
continue;
}
- if (!equivalentAsOperands(LAgg, RAgg)) {
+ if (!equivalentAsOperands(LAgg, RAgg, AC)) {
return false;
}
}
return false;
}
- bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
+ bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R,
+ const AssumptionContext *AC) {
if (L == R)
return true;
continue;
}
- if (!equivalentAsOperands(LOp, ROp))
+ if (!equivalentAsOperands(LOp, ROp, AC))
return false;
}
return true;
}
- bool equivalentAsOperands(const Value *L, const Value *R) {
+ // There are cases where we cannot determine whether two values are
+ // equivalent, because it depends on not yet processed basic blocks -- see the
+ // documentation on assumptions.
+ //
+ // AC is the context in which we are currently performing a diff.
+ // When we encounter a pair of values for which we can neither prove
+ // equivalence nor the opposite, we do the following:
+ // * If AC is nullptr, we treat the pair as non-equivalent.
+ // * If AC is set, we add an assumption for the basic blocks given by AC,
+ // and treat the pair as equivalent. The assumption is checked later.
+ bool equivalentAsOperands(const Value *L, const Value *R,
+ const AssumptionContext *AC) {
// Fall out if the values have different kind.
// This possibly shouldn't take priority over oracles.
if (L->getValueID() != R->getValueID())
// InlineAsm, MDNode, MDString, PseudoSourceValue
if (isa<Constant>(L))
- return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
+ return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC);
- if (isa<Instruction>(L))
- return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
+ if (isa<Instruction>(L)) {
+ auto It = Values.find(L);
+ if (It != Values.end())
+ return It->second == R;
+
+ if (TentativeValues.count(std::make_pair(L, R)))
+ return true;
+
+ // L and R might be equivalent, this could depend on not yet processed
+ // basic blocks, so we cannot decide here.
+ if (AC) {
+ // Add an assumption, unless there is a conflict with an existing one
+ BlockDiffCandidate &BDC =
+ getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB);
+ auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R});
+ if (!InsertionResult.second && InsertionResult.first->second != R) {
+ // We already have a conflicting equivalence assumption for L, so at
+ // least one must be wrong, and we know that there is a diff.
+ BDC.KnownToDiffer = true;
+ BDC.EquivalenceAssumptions.clear();
+ return false;
+ }
+ // Optimistically assume equivalence, and check later once all BBs
+ // have been processed.
+ return true;
+ }
+
+ // Assumptions disabled, so pessimistically assume non-equivalence.
+ return false;
+ }
if (isa<Argument>(L))
return Values[L] == R;
Queue(QueueSorter(*this_())) {}
void diff(const Function *L, const Function *R) {
+ assert(Values.empty() && "Multiple diffs per engine are not supported!");
+
if (L->arg_size() != R->arg_size())
Engine.log("different argument counts");
tryUnify(&*L->begin(), &*R->begin());
processQueue();
+ checkAndReportDiffCandidates();
}
};
bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
const Instruction *R) {
- return !diff(L, R, false, false);
+ return !diff(L, R, false, false, false);
}
void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
const CallInst *LCall = cast<CallInst>(&*I);
const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
if (!equivalentAsOperands(LCall->getCalledOperand(),
- RInvoke->getCalledOperand()))
+ RInvoke->getCalledOperand(), nullptr))
return;
if (!LCall->use_empty())
Values[LCall] = RInvoke;
const CallInst *RCall = cast<CallInst>(I);
const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
if (!equivalentAsOperands(LInvoke->getCalledOperand(),
- RCall->getCalledOperand()))
+ RCall->getCalledOperand(), nullptr))
return;
if (!LInvoke->use_empty())
Values[LInvoke] = RCall;
if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
return FunctionDifferenceEngine(*this, GVL, GVR)
- .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
+ .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(),
+ nullptr);
}
return L->getName() == R->getName();