/// The \ref isSimilar function compares each IRInstructionData against one
/// another and if we have the same sequences of IRInstructionData that would
/// create the same hash, we have similar IRSimilarityCandidates.
+///
+/// We can also compare the structure of IRSimilarityCandidates. If we can
+/// create a mapping of registers in the region contained by one
+/// IRSimilarityCandidate to the region contained by different
+/// IRSimilarityCandidate, they can be considered structurally similar.
+///
+/// IRSimilarityCandidate1: IRSimilarityCandidate2:
+/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
+/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
+/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
+///
+/// Can have the following mapping from candidate to candidate of:
+/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
+/// and can be considered similar.
+///
+/// IRSimilarityCandidate1: IRSimilarityCandidate2:
+/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
+/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
+/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
+///
+/// We cannot create the same mapping since the use of c4 is not used in the
+/// same way as %b or c2.
class IRSimilarityCandidate {
private:
/// The start index of this IRSimilarityCandidate in the instruction list.
/// IRInstructionData in \p B.
static bool isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
+
+ /// \param A - The first IRInstructionCandidate to compare.
+ /// \param B - The second IRInstructionCandidate to compare.
+ /// \returns True when every IRInstructionData in \p A is structurally similar
+ /// to \p B.
+ static bool compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+
+ struct OperandMapping {
+ /// The IRSimilarityCandidate that holds the instruction the OperVals were
+ /// pulled from.
+ const IRSimilarityCandidate &IRSC;
+
+ /// The operand values to be analyzed.
+ ArrayRef<Value *> &OperVals;
+
+ /// The current mapping of global value numbers from one IRSimilarityCandidate
+ /// to another IRSimilarityCandidate.
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
+ };
+
+ /// Compare the operands in \p A and \p B and check that the current mapping
+ /// of global value numbers from \p A to \p B and \p B to \A is consistent.
+ ///
+ /// \param A - The first IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \param B - The second IRInstructionCandidate, operand values, and current
+ /// operand mappings to compare.
+ /// \returns true if the IRSimilarityCandidates operands are compatible.
+ static bool compareOperandMapping(OperandMapping A, OperandMapping B);
+
/// Compare the start and end indices of the two IRSimilarityCandidates for
/// whether they overlap. If the start instruction of one
/// IRSimilarityCandidate is less than the end instruction of the other, and
iterator begin() const { return iterator(front()); }
iterator end() const { return std::next(iterator(back())); }
};
+
} // end namespace IRSimilarity
} // end namespace llvm
});
}
+/// Determine if operand number \p TargetArgVal is in the current mapping set
+/// for operand number \p SourceArgVal.
+///
+/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
+/// value numbers from source IRSimilarityCandidate to target
+/// IRSimilarityCandidate.
+/// \param [in] SourceArgVal The global value number for an operand in the
+/// in the original candidate.
+/// \param [in] TargetArgVal The global value number for the corresponding
+/// operand in the other candidate.
+/// \returns True if there exists a mapping and false if not.
+bool checkNumberingAndReplace(
+ DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
+ unsigned SourceArgVal, unsigned TargetArgVal) {
+ // We are given two unsigned integers representing the global values of
+ // the operands in different IRSimilarityCandidates and a current mapping
+ // between the two.
+ //
+ // Source Operand GVN: 1
+ // Target Operand GVN: 2
+ // CurrentMapping: {1: {1, 2}}
+ //
+ // Since we have mapping, and the target operand is contained in the set, we
+ // update it to:
+ // CurrentMapping: {1: {2}}
+ // and can return true. But, if the mapping was
+ // CurrentMapping: {1: {3}}
+ // we would return false.
+
+ bool WasInserted;
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
+
+ std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
+ std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
+
+ // If we created a new mapping, then we are done.
+ if (WasInserted)
+ return true;
+
+ // If there is more than one option in the mapping set, and the target value
+ // is included in the mapping set replace that set with one that only includes
+ // the target value, as it is the only valid mapping via the non commutative
+ // instruction.
+
+ DenseSet<unsigned> &TargetSet = Val->second;
+ if (TargetSet.size() > 1 && TargetSet.find(TargetArgVal) != TargetSet.end()) {
+ TargetSet.clear();
+ TargetSet.insert(TargetArgVal);
+ return true;
+ }
+
+ // Return true if we can find the value in the set.
+ return TargetSet.find(TargetArgVal) != TargetSet.end();
+}
+
+bool IRSimilarityCandidate::compareOperandMapping(OperandMapping A,
+ OperandMapping B) {
+ // Iterators to keep track of where we are in the operands for each
+ // Instruction.
+ ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
+ ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
+ unsigned OperandLength = A.OperVals.size();
+
+ // For each operand, get the value numbering and ensure it is consistent.
+ for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
+ unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
+ unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
+
+ // Attempt to add a set with only the target value. If there is no mapping
+ // we can create it here.
+ //
+ // For an instruction like a subtraction:
+ // IRSimilarityCandidateA: IRSimilarityCandidateB:
+ // %resultA = sub %a, %b %resultB = sub %d, %e
+ //
+ // We map %a -> %d and %b -> %e.
+ //
+ // And check to see whether their mapping is consistent in
+ // checkNumberingAndReplace.
+
+ if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
+ return false;
+
+ if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
+ return false;
+ }
+ return true;
+}
+
+bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ if (A.getLength() != B.getLength())
+ return false;
+
+ if (A.ValueToNumber.size() != B.ValueToNumber.size())
+ return false;
+
+ iterator ItA = A.begin();
+ iterator ItB = B.begin();
+
+ // These sets create a create a mapping between the values in one candidate
+ // to values in the other candidate. If we create a set with one element,
+ // and that same element maps to the original element in the candidate
+ // we have a good mapping.
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
+
+ bool WasInserted;
+
+ // Iterate over the instructions contained in each candidate
+ unsigned SectionLength = A.getStartIdx() + A.getLength();
+ for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
+ ItA++, ItB++, Loc++) {
+ // Make sure the instructions are similar to one another.
+ if (!isClose(*ItA, *ItB))
+ return false;
+
+ Instruction *IA = ItA->Inst;
+ Instruction *IB = ItB->Inst;
+
+ if (!ItA->Legal || !ItB->Legal)
+ return false;
+
+ // Get the operand sets for the instructions.
+ ArrayRef<Value *> OperValsA = ItA->OperVals;
+ ArrayRef<Value *> OperValsB = ItB->OperVals;
+
+ unsigned InstValA = A.ValueToNumber.find(IA)->second;
+ unsigned InstValB = B.ValueToNumber.find(IB)->second;
+
+ // Ensure that the mappings for the instructions exists.
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
+ std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
+ if (!WasInserted && ValueMappingIt->second.find(InstValB) ==
+ ValueMappingIt->second.end())
+ return false;
+
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
+ std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
+ if (!WasInserted && ValueMappingIt->second.find(InstValA) ==
+ ValueMappingIt->second.end())
+ return false;
+
+ // TODO: Handle commutative instructions by mapping one operand to many
+ // operands instead only mapping a single operand to a single operand.
+ if (!compareOperandMapping({A, OperValsA, ValueNumberMappingA},
+ {B, OperValsB, ValueNumberMappingB}))
+ return false;
+ }
+
+ return true;
+}
+
bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
auto DoesOverlap = [](const IRSimilarityCandidate &X,
//
//===----------------------------------------------------------------------===//
-#include "gtest/gtest.h"
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
using namespace llvm;
using namespace IRSimilarity;
// A helper function that accepts an instruction list from a module made up of
// two blocks of two legal instructions and terminator, and checks them for
// instruction similarity.
-static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
+static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList,
+ bool Structure = false) {
std::vector<IRInstructionData *>::iterator Start, End;
Start = InstrList.begin();
std::advance(Start, 3);
std::advance(End, 4);
IRSimilarityCandidate Cand2(3, 2, *Start, *End);
+ if (Structure)
+ return IRSimilarityCandidate::compareStructure(Cand1, Cand2);
return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
}
IRSimilarityCandidate Cand2(3, 3, *Start, *End);
ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
}
+
+// Checks that different structure, in this case, where we introduce a new
+// needed input in one region, is recognized as different.
+TEST(IRSimilarityCandidate, DifferentStructure) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = add i32 %a, %b
+ %3 = add i32 %b, %0
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+ SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+ IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+ getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_FALSE(longSimCandCompare(InstrList, true));
+}
+
+// Checks that the same structure is recognized between two candidates. The
+// items %a and %b are used in the same way in both sets of instructions.
+TEST(IRSimilarityCandidate, SameStructure) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = sub i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = add i32 %a, %b
+ %3 = sub i32 %b, %a
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+ SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+ IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+ getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_TRUE(longSimCandCompare(InstrList, true));
+}
+
+// Checks that the same structure is recognized between two candidates. While
+// the input names are reversed, they still perform the same overall operation.
+TEST(IRSimilarityCandidate, DifferentNameSameStructure) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = add i32 %b, %a
+ %3 = add i32 %a, %b
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+ SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+ IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+ getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_TRUE(longSimCandCompare(InstrList, true));
+}