// or comparison predicate. These are used to create a hash to map instructions
// to integers to be used in similarity matching in sequences of instructions
//
+// Terminology:
+// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
+// Instructions), usually used to denote a region of similarity has been found.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
InstructionClassification InstClassifier;
};
+/// This is a class that wraps a range of IRInstructionData from one point to
+/// another in the vector of IRInstructionData, which is a region of the
+/// program. It is also responsible for defining the structure within this
+/// region of instructions.
+///
+/// The structure of a region is defined through a value numbering system
+/// assigned to each unique value in a region at the creation of the
+/// IRSimilarityCandidate.
+///
+/// For example, for each Instruction we add a mapping for each new
+/// value seen in that Instruction.
+/// IR: Mapping Added:
+/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
+/// %add2 = add i32 %a, %1 %add2 -> 4
+/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
+///
+/// We can compare IRSimilarityCandidates against one another.
+/// The \ref isSimilar function compares each IRInstructionData against one
+/// another and if we have the same sequences of IRInstructionData that would
+/// create the same hash, we have similar IRSimilarityCandidates.
+class IRSimilarityCandidate {
+private:
+ /// The start index of this IRSimilarityCandidate in the instruction list.
+ unsigned StartIdx = 0;
+
+ /// The number of instructions in this IRSimilarityCandidate.
+ unsigned Len = 0;
+
+ /// The first instruction in this IRSimilarityCandidate.
+ IRInstructionData *FirstInst = nullptr;
+
+ /// The last instruction in this IRSimilarityCandidate.
+ IRInstructionData *LastInst = nullptr;
+
+ /// Global Value Numbering structures
+ /// @{
+ /// Stores the mapping of the value to the number assigned to it in the
+ /// IRSimilarityCandidate.
+ DenseMap<Value *, unsigned> ValueToNumber;
+ /// Stores the mapping of the number to the value assigned this number.
+ DenseMap<unsigned, Value *> NumberToValue;
+ /// @}
+
+public:
+ /// \param StartIdx - The starting location of the region.
+ /// \param StartIdx - The length of the region.
+ /// \param FirstInstIt - The starting IRInstructionData of the region.
+ /// \param LastInstIt - The ending IRInstructionData of the region.
+ IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+ IRInstructionData *FirstInstIt,
+ IRInstructionData *LastInstIt);
+
+ /// \param A - The first IRInstructionCandidate to compare.
+ /// \param B - The second IRInstructionCandidate to compare.
+ /// \returns True when every IRInstructionData in \p A is similar to every
+ /// IRInstructionData in \p B.
+ static bool isSimilar(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+ /// Compare the start and end indices of the two IRSimilarityCandidates for
+ /// whether they overlap. If the start instruction of one
+ /// IRSimilarityCandidate is less than the end instruction of the other, and
+ /// the start instruction of one is greater than the start instruction of the
+ /// other, they overlap.
+ ///
+ /// \returns true if the IRSimilarityCandidates do not have overlapping
+ /// instructions.
+ static bool overlap(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B);
+
+ /// \returns the number of instructions in this Candidate.
+ unsigned getLength() const { return Len; }
+
+ /// \returns the start index of this IRSimilarityCandidate.
+ unsigned getStartIdx() const { return StartIdx; }
+
+ /// \returns the end index of this IRSimilarityCandidate.
+ unsigned getEndIdx() const { return StartIdx + Len - 1; }
+
+ /// \returns The first IRInstructionData.
+ IRInstructionData *front() const { return FirstInst; }
+ /// \returns The last IRInstructionData.
+ IRInstructionData *back() const { return LastInst; }
+
+ /// \returns The first Instruction.
+ Instruction *frontInstruction() { return FirstInst->Inst; }
+ /// \returns The last Instruction
+ Instruction *backInstruction() { return LastInst->Inst; }
+
+ /// \returns The BasicBlock the IRSimilarityCandidate starts in.
+ BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
+ /// \returns The BasicBlock the IRSimilarityCandidate ends in.
+ BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
+
+ /// \returns The Function that the IRSimilarityCandidate is located in.
+ Function *getFunction() { return getStartBB()->getParent(); }
+
+ /// Finds the positive number associated with \p V if it has been mapped.
+ /// \param [in] V - the Value to find.
+ /// \returns The positive number corresponding to the value.
+ /// \returns None if not present.
+ Optional<unsigned> getGVN(Value *V) {
+ assert(V != nullptr && "Value is a nullptr?");
+ DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
+ if (VNIt == ValueToNumber.end())
+ return None;
+ return VNIt->second;
+ }
+
+ /// Finds the Value associate with \p Num if it exists.
+ /// \param [in] Num - the number to find.
+ /// \returns The Value associated with the number.
+ /// \returns None if not present.
+ Optional<Value *> fromGVN(unsigned Num) {
+ DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
+ if (VNIt == NumberToValue.end())
+ return None;
+ assert(VNIt->second != nullptr && "Found value is a nullptr!");
+ return VNIt->second;
+ }
+
+ /// \param RHS -The IRSimilarityCandidate to compare against
+ /// \returns true if the IRSimilarityCandidate is occurs after the
+ /// IRSimilarityCandidate in the program.
+ bool operator<(const IRSimilarityCandidate &RHS) const {
+ return getStartIdx() > RHS.getStartIdx();
+ }
+
+ using iterator = IRInstructionDataList::iterator;
+ iterator begin() const { return iterator(front()); }
+ iterator end() const { return std::next(iterator(back())); }
+};
} // end namespace IRSimilarity
} // end namespace llvm
return INumber;
}
+
+IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
+ IRInstructionData *FirstInstIt,
+ IRInstructionData *LastInstIt)
+ : StartIdx(StartIdx), Len(Len) {
+
+ assert(FirstInstIt != nullptr && "Instruction is nullptr!");
+ assert(LastInstIt != nullptr && "Instruction is nullptr!");
+ assert(StartIdx + Len > StartIdx &&
+ "Overflow for IRSimilarityCandidate range?");
+ assert(Len - 1 ==
+ std::distance(iterator(FirstInstIt), iterator(LastInstIt)) &&
+ "Length of the first and last IRInstructionData do not match the "
+ "given length");
+
+ // We iterate over the given instructions, and map each unique value
+ // to a unique number in the IRSimilarityCandidate ValueToNumber and
+ // NumberToValue maps. A constant get its own value globally, the individual
+ // uses of the constants are not considered to be unique.
+ //
+ // IR: Mapping Added:
+ // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
+ // %add2 = add i32 %a, %1 %add2 -> 4
+ // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
+ //
+ // when replace with global values, starting from 1, would be
+ //
+ // 3 = add i32 1, 2
+ // 4 = add i32 1, 3
+ // 6 = add i32 5, 2
+ unsigned LocalValNumber = 1;
+ IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
+ for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
+ // Map the operand values to an unsigned integer if it does not already
+ // have an unsigned integer assigned to it.
+ for (Value *Arg : ID->OperVals)
+ if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
+ ValueToNumber.try_emplace(Arg, LocalValNumber);
+ NumberToValue.try_emplace(LocalValNumber, Arg);
+ LocalValNumber++;
+ }
+
+ // Mapping the instructions to an unsigned integer if it is not already
+ // exist in the mapping.
+ if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
+ ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
+ NumberToValue.try_emplace(LocalValNumber, ID->Inst);
+ LocalValNumber++;
+ }
+ }
+
+ // Setting the first and last instruction data pointers for the candidate. If
+ // we got through the entire for loop without hitting an assert, we know
+ // that both of these instructions are not nullptrs.
+ FirstInst = FirstInstIt;
+ LastInst = LastInstIt;
+}
+
+bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ if (A.getLength() != B.getLength())
+ return false;
+
+ auto InstrDataForBoth =
+ zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
+
+ return all_of(InstrDataForBoth,
+ [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
+ IRInstructionData &A = std::get<0>(R);
+ IRInstructionData &B = std::get<1>(R);
+ if (!A.Legal || !B.Legal)
+ return false;
+ return isClose(A, B);
+ });
+}
+
+bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B) {
+ auto DoesOverlap = [](const IRSimilarityCandidate &X,
+ const IRSimilarityCandidate &Y) {
+ // Check:
+ // XXXXXX X starts before Y ends
+ // YYYYYYY Y starts after X starts
+ return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
+ };
+
+ return DoesOverlap(A, B) || DoesOverlap(B, A);
+}
// Make sure that the unsigned vector is the expected size.
ASSERT_TRUE(UnsignedVec.size() == 6);
}
+
+// A helper function that accepts an instruction list from a module made up of
+// two blocks of two legal instructions and terminator, and checks them for
+// instruction similarity.
+static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
+
+ std::vector<IRInstructionData *>::iterator Start, End;
+
+ Start = InstrList.begin();
+ End = InstrList.begin();
+
+ std::advance(End, 1);
+ IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+
+ Start = InstrList.begin();
+ End = InstrList.begin();
+
+ std::advance(Start, 3);
+ std::advance(End, 4);
+ IRSimilarityCandidate Cand2(3, 2, *Start, *End);
+ return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
+}
+
+// Checks that two adds with commuted operands are considered to be the same
+// instructions.
+TEST(IRSimilarityCandidate, CheckIdenticalInstructions) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %b, %a
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ getVectors(*M, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(3));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ std::vector<IRInstructionData *>::iterator Start, End;
+ Start = InstrList.begin();
+ End = InstrList.begin();
+ std::advance(End, 1);
+ IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+ IRSimilarityCandidate Cand2(0, 2, *Start, *End);
+
+ ASSERT_TRUE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
+}
+
+// Checks that IRSimilarityCandidates wrapping these two regions of instructions
+// are able to differentiate between instructions that have different opcodes.
+TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = sub i32 %a, %b
+ %3 = add i32 %b, %a
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ getVectors(*M, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_FALSE(longSimCandCompare(InstrList));
+}
+
+// Checks that IRSimilarityCandidates wrapping these two regions of instructions
+// are able to differentiate between instructions that have different types.
+TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = add i64 %c, %d
+ %3 = add i64 %d, %c
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ getVectors(*M, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_FALSE(longSimCandCompare(InstrList));
+}
+
+// Check that debug instructions do not impact similarity. They are marked as
+// invisible.
+TEST(IRSimilarityCandidate, IdenticalWithDebug) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ call void @llvm.dbg.value(metadata !0)
+ %1 = add i32 %b, %a
+ ret i32 0
+ bb1:
+ %2 = add i32 %a, %b
+ call void @llvm.dbg.value(metadata !1)
+ %3 = add i32 %b, %a
+ ret i32 0
+ bb2:
+ %4 = add i32 %a, %b
+ %5 = add i32 %b, %a
+ ret i32 0
+ }
+
+ declare void @llvm.dbg.value(metadata)
+ !0 = distinct !{!"test\00", i32 10}
+ !1 = distinct !{!"test\00", i32 11})";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ getVectors(*M, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(9));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ ASSERT_TRUE(longSimCandCompare(InstrList));
+}
+
+// Checks that IRSimilarityCandidates that include illegal instructions, are not
+// considered to be the same set of instructions. In these sets of instructions
+// the allocas are illegal.
+TEST(IRSimilarityCandidate, IllegalInCandidate) {
+ StringRef ModuleString = R"(
+ define i32 @f(i32 %a, i32 %b) {
+ bb0:
+ %0 = add i32 %a, %b
+ %1 = add i32 %a, %b
+ %2 = alloca i32
+ ret i32 0
+ bb1:
+ %3 = add i32 %a, %b
+ %4 = add i32 %a, %b
+ %5 = alloca i32
+ ret i32 0
+ })";
+ LLVMContext Context;
+ std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+ std::vector<IRInstructionData *> InstrList;
+ std::vector<unsigned> UnsignedVec;
+
+ getVectors(*M, InstrList, UnsignedVec);
+
+ // Check to make sure that we have a long enough region.
+ ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+ // Check that the instructions were added correctly to both vectors.
+ ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+ std::vector<IRInstructionData *>::iterator Start, End;
+
+ Start = InstrList.begin();
+ End = InstrList.begin();
+
+ std::advance(End, 2);
+ IRSimilarityCandidate Cand1(0, 3, *Start, *End);
+
+ Start = InstrList.begin();
+ End = InstrList.begin();
+
+ std::advance(Start, 3);
+ std::advance(End, 5);
+ IRSimilarityCandidate Cand2(3, 3, *Start, *End);
+ ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
+}