From: Andrew Litteken Date: Thu, 17 Sep 2020 01:24:29 +0000 (-0500) Subject: [IRSim] Adding IR Instruction Mapper X-Git-Tag: llvmorg-13-init~11798 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b04c1a9d3127730c05e8a22a0e931a12a39528df;p=platform%2Fupstream%2Fllvm.git [IRSim] Adding IR Instruction Mapper This introduces the IRInstructionMapper, and the associated wrapper for instructions, IRInstructionData, that maps IR level Instructions to unsigned integers. Mapping is done mainly by using the "isSameOperationAs" comparison between two instructions. If they return true, the opcode, result type, and operand types of the instruction are used to hash the instruction with an unsigned integer. The mapper accepts instruction ranges, and adds each resulting integer to a list, and each wrapped instruction to a separate list. At present, branches, phi nodes are not mapping and exception handling is illegal. Debug instructions are not considered. The different mapping schemes are tested in unittests/Analysis/IRSimilarityIdentifierTest.cpp Differential Revision: https://reviews.llvm.org/D86968 --- diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h new file mode 100644 index 0000000..9e6d3ae --- /dev/null +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -0,0 +1,357 @@ +//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Interface file for the IRSimilarityIdentifier for identifying similarities in +// IR including the IRInstructionMapper, which maps an Instruction to unsigned +// integers. +// +// Two sequences of instructions are called "similar" if they perform the same +// series of operations for all inputs. +// +// \code +// %1 = add i32 %a, 10 +// %2 = add i32 %a, %1 +// %3 = icmp slt icmp %1, %2 +// \endcode +// +// and +// +// \code +// %1 = add i32 11, %a +// %2 = sub i32 %a, %1 +// %3 = icmp sgt icmp %2, %1 +// \endcode +// +// ultimately have the same result, even if the inputs, and structure are +// slightly different. +// +// For instructions, we do not worry about operands that do not have fixed +// semantic meaning to the program. We consider the opcode that the instruction +// has, the types, parameters, and extra information such as the function name, +// or comparison predicate. These are used to create a hash to map instructions +// to integers to be used in similarity matching in sequences of instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H +#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H + +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { +namespace IRSimilarity { + +/// This represents what is and is not supported when finding similarity in +/// Instructions. +/// +/// Legal Instructions are considered when looking at similarity between +/// Instructions. +/// +/// Illegal Instructions cannot be considered when looking for similarity +/// between Instructions. They act as boundaries between similarity regions. +/// +/// Invisible Instructions are skipped over during analysis. +// TODO: Shared with MachineOutliner +enum InstrType { Legal, Illegal, Invisible }; + +/// This provides the utilities for hashing an Instruction to an unsigned +/// integer. Two IRInstructionDatas produce the same hash value when their +/// underlying Instructions perform the same operation (even if they don't have +/// the same input operands.) +/// As a more concrete example, consider the following: +/// +/// \code +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// \endcode +/// +// Then the IRInstructionData wrappers for these Instructions may be hashed like +/// so: +/// +/// \code +/// ; These two adds have the same types and operand types, so they hash to the +/// ; same number. +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d ; Hash: 1 +/// ; This add produces an i64. This differentiates it from %add1 and %add2. So, +/// ; it hashes to a different number. +/// %add3 = add i64 %e, %f; Hash: 2 +/// \endcode +/// +/// +/// This hashing scheme will be used to represent the program as a very long +/// string. This string can then be placed in a data structure which can be used +/// for similarity queries. +/// +/// TODO: Handle types of Instructions which can be equal even with different +/// operands. (E.g. comparisons with swapped predicates.) +/// TODO: Handle CallInsts, which are only checked for function type +/// by \ref isSameOperationAs. +/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the +/// exact same, and some do not. +struct IRInstructionData : ilist_node { + + /// The source Instruction that is being wrapped. + Instruction *Inst = nullptr; + /// The values of the operands in the Instruction. + SmallVector OperVals; + /// The legality of the wrapped instruction. This is informed by InstrType, + /// and is used when checking when two instructions are considered similar. + /// If either instruction is not legal, the instructions are automatically not + /// considered similar. + bool Legal; + + /// Gather the information that is difficult to gather for an Instruction, or + /// is changed. i.e. the operands of an Instruction and the Types of those + /// operands. This extra information allows for similarity matching to make + /// assertions that allow for more flexibility when checking for whether an + /// Instruction performs the same operation. + IRInstructionData(Instruction &I, bool Legality); + + /// Hashes \p Value based on its opcode, types, and operand types. + /// Two IRInstructionData instances produce the same hash when they perform + /// the same operation. + /// + /// As a simple example, consider the following instructions. + /// + /// \code + /// %add1 = add i32 %x1, %y1 + /// %add2 = add i32 %x2, %y2 + /// + /// %sub = sub i32 %x1, %y1 + /// + /// %add_i64 = add i64 %x2, %y2 + /// \endcode + /// + /// Because the first two adds operate the same types, and are performing the + /// same action, they will be hashed to the same value. + /// + /// However, the subtraction instruction is not the same as an addition, and + /// will be hashed to a different value. + /// + /// Finally, the last add has a different type compared to the first two add + /// instructions, so it will also be hashed to a different value that any of + /// the previous instructions. + /// + /// \param [in] Value - The IRInstructionData instance to be hashed. + /// \returns A hash_value of the IRInstructionData. + friend hash_code hash_value(const IRInstructionData &ID) { + SmallVector OperTypes; + for (Value *V : ID.OperVals) + OperTypes.push_back(V->getType()); + + return hash_combine( + hash_value(ID.Inst->getOpcode()), hash_value(ID.Inst->getType()), + hash_combine_range(OperTypes.begin(), OperTypes.end())); + } +}; + +/// Compare one IRInstructionData class to another IRInstructionData class for +/// whether they are performing a the same operation, and can mapped to the +/// same value. For regular instructions if the hash value is the same, then +/// they will also be close. +/// +/// \param A - The first IRInstructionData class to compare +/// \param B - The second IRInstructionData class to compare +/// \returns true if \p A and \p B are similar enough to be mapped to the same +/// value. +bool isClose(const IRInstructionData &A, const IRInstructionData &B); + +struct IRInstructionDataTraits : DenseMapInfo { + static inline IRInstructionData *getEmptyKey() { return nullptr; } + static inline IRInstructionData *getTombstoneKey() { + return reinterpret_cast(-1); + } + + static unsigned getHashValue(const IRInstructionData *E) { + using llvm::hash_value; + assert(E && "IRInstructionData is a nullptr?"); + return hash_value(*E); + } + + static bool isEqual(const IRInstructionData *LHS, + const IRInstructionData *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || LHS == getTombstoneKey()) + return LHS == RHS; + + assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); + return isClose(*LHS, *RHS); + } +}; + +/// Helper struct for converting the Instructions in a Module into a vector of +/// unsigned integers. This vector of unsigned integers can be thought of as a +/// "numeric string". This numeric string can then be queried by, for example, +/// data structures that find repeated substrings. +/// +/// This hashing is done per BasicBlock in the module. To hash Instructions +/// based off of their operations, each Instruction is wrapped in an +/// IRInstructionData struct. The unsigned integer for an IRInstructionData +/// depends on: +/// - The hash provided by the IRInstructionData. +/// - Which member of InstrType the IRInstructionData is classified as. +// See InstrType for more details on the possible classifications, and how they +// manifest in the numeric string. +/// +/// The numeric string for an individual BasicBlock is terminated by an unique +/// unsigned integer. This prevents data structures which rely on repetition +/// from matching across BasicBlocks. (For example, the SuffixTree.) +/// As a concrete example, if we have the following two BasicBlocks: +/// \code +/// bb0: +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// bb1: +/// %sub = sub i32 %c, %d +/// \endcode +/// We may hash the Instructions like this (via IRInstructionData): +/// \code +/// bb0: +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d; Hash: 1 +/// %add3 = add i64 %e, %f; Hash: 2 +/// bb1: +/// %sub = sub i32 %c, %d; Hash: 3 +/// %add4 = add i32 %c, %d ; Hash: 1 +/// \endcode +/// And produce a "numeric string representation" like so: +/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 +/// +/// TODO: This is very similar to the MachineOutliner, and should be +/// consolidated into the same interface. +struct IRInstructionMapper { + /// The starting illegal instruction number to map to. + /// + /// Set to -3 for compatibility with DenseMapInfo. + unsigned IllegalInstrNumber = static_cast(-3); + + /// The next available integer to assign to a legal Instruction to. + unsigned LegalInstrNumber = 0; + + /// Correspondence from IRInstructionData to unsigned integers. + DenseMap + InstructionIntegerMap; + + /// Set if we added an illegal number in the previous step. + /// Since each illegal number is unique, we only need one of them between + /// each range of legal numbers. This lets us make sure we don't add more + /// than one illegal number per range. + bool AddedIllegalLastTime = false; + + /// Marks whether we found a illegal instruction in the previous step. + bool CanCombineWithPrevInstr = false; + + /// Marks whether we have found a set of instructions that is long enough + /// to be considered for similarity. + bool HaveLegalRange = false; + + /// This allocator pointer is in charge of holding on to the IRInstructionData + /// so it is not deallocated until whatever external tool is using it is done + /// with the information. + BumpPtrAllocator *InstDataAllocator = nullptr; + + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers + /// determined by \p InstrType. Two Instructions are mapped to the same value + /// if they are close as defined by the InstructionData class above. + /// + /// \param [in] BB - The BasicBlock to be mapped to integers. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. + void convertToUnsignedVec(BasicBlock &BB, + std::vector &InstrList, + std::vector &IntegerMapping); + + /// Maps an Instruction to a legal integer. + /// + /// \param [in] It - The Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of InstructionData to append + /// to. \returns The integer \p It was mapped to. + unsigned mapToLegalUnsigned(BasicBlock::iterator &It, + std::vector &IntegerMappingForBB, + std::vector &InstrListForBB); + + /// Maps an Instruction to an illegal integer. + /// + /// \param [in] It - The \p Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param End - true if creating a dummy IRInstructionData at the end of a + /// basic block. + /// \returns The integer \p It was mapped to. + unsigned mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End = false); + + IRInstructionMapper(BumpPtrAllocator *IDA) : InstDataAllocator(IDA) { + // Make sure that the implementation of DenseMapInfo hasn't + // changed. + assert(DenseMapInfo::getEmptyKey() == static_cast(-1) && + "DenseMapInfo's empty key isn't -1!"); + assert(DenseMapInfo::getTombstoneKey() == + static_cast(-2) && + "DenseMapInfo's tombstone key isn't -2!"); + } + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. + struct InstructionClassification + : public InstVisitor { + InstructionClassification() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + InstrType visitPHINode(PHINode &PN) { return Illegal; } + // TODO: Handle allocas. + InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } + // We exclude variable argument instructions since variable arguments + // requires extra checking of the argument list. + InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } + // We exclude all exception handling cases since they are so context + // dependent. + InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } + InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } + // TODO: Handle GetElementPtrInsts + InstrType visitGetElementPtrInst(GetElementPtrInst &GEPI) { + return Illegal; + } + // TODO: Handle specific intrinsics. + InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + // TODO: Handle CallInsts. + InstrType visitCallInst(CallInst &CI) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } + // TODO: Handle interblock similarity. + InstrType visitTerminator(Instruction &I) { return Illegal; } + InstrType visitInstruction(Instruction &I) { return Legal; } + }; + + /// Maps an Instruction to a member of InstrType. + InstructionClassification InstClassifier; +}; + +} // end namespace IRSimilarity +} // end namespace llvm + +#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 78cc764..4bd45ea 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_component_library(LLVMAnalysis GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp + IRSimilarityIdentifier.cpp IVDescriptors.cpp IVUsers.cpp IndirectCallPromotionAnalysis.cpp diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp new file mode 100644 index 0000000..050f5b1 --- /dev/null +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -0,0 +1,153 @@ +//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Implementation file for the IRSimilarityIdentifier for identifying +// similarities in IR including the IRInstructionMapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" + +using namespace llvm; +using namespace IRSimilarity; + +IRInstructionData::IRInstructionData(Instruction &I, bool Legality) + : Inst(&I), Legal(Legality) { + // Here we collect the operands to be used to determine whether two + // instructions are similar to one another. + for (Use &OI : I.operands()) + OperVals.push_back(OI.get()); +} + +bool IRSimilarity::isClose(const IRInstructionData &A, + const IRInstructionData &B) { + return A.Legal && A.Inst->isSameOperationAs(B.Inst); +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +void IRInstructionMapper::convertToUnsignedVec( + BasicBlock &BB, std::vector &InstrList, + std::vector &IntegerMapping) { + BasicBlock::iterator It = BB.begin(); + + std::vector IntegerMappingForBB; + std::vector InstrListForBB; + + HaveLegalRange = false; + CanCombineWithPrevInstr = false; + AddedIllegalLastTime = true; + + for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { + switch (InstClassifier.visit(*It)) { + case InstrType::Legal: + mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Illegal: + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); + break; + case InstrType::Invisible: + AddedIllegalLastTime = false; + break; + } + } + + if (HaveLegalRange) { + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + InstrList.insert(InstrList.end(), InstrListForBB.begin(), + InstrListForBB.end()); + IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForBB.begin(), + IntegerMappingForBB.end()); + } +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToLegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB) { + // We added something legal, so we should unset the AddedLegalLastTime + // flag. + AddedIllegalLastTime = false; + + // If we have at least two adjacent legal instructions (which may have + // invisible instructions in between), remember that. + if (CanCombineWithPrevInstr) + HaveLegalRange = true; + CanCombineWithPrevInstr = true; + + // Get the integer for this instruction or give it the current + // LegalInstrNumber. + IRInstructionData *ID = new (InstDataAllocator->Allocate()) + IRInstructionData(*It, true); + InstrListForBB.push_back(ID); + + // Add to the instruction list + bool WasInserted; + DenseMap::iterator + ResultIt; + std::tie(ResultIt, WasInserted) = + InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); + unsigned INumber = ResultIt->second; + + // There was an insertion. + if (WasInserted) + LegalInstrNumber++; + + IntegerMappingForBB.push_back(INumber); + + // Make sure we don't overflow or use any integers reserved by the DenseMap. + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(LegalInstrNumber != DenseMapInfo::getEmptyKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "Tried to assign DenseMap tombstone or empty key to instruction."); + + return INumber; +} + +// TODO: This is the same as the MachineOutliner, and should be consolidated +// into the same interface. +unsigned IRInstructionMapper::mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector &IntegerMappingForBB, + std::vector &InstrListForBB, bool End) { + // Can't combine an illegal instruction. Set the flag. + CanCombineWithPrevInstr = false; + + // Only add one illegal number per range of legal numbers. + if (AddedIllegalLastTime) + return IllegalInstrNumber; + + IRInstructionData *ID = nullptr; + if (!End) + ID = new (InstDataAllocator->Allocate()) + IRInstructionData(*It, false); + InstrListForBB.push_back(ID); + + // Remember that we added an illegal number last time. + AddedIllegalLastTime = true; + unsigned INumber = IllegalInstrNumber; + IntegerMappingForBB.push_back(IllegalInstrNumber--); + + assert(LegalInstrNumber < IllegalInstrNumber && + "Instruction mapping overflow!"); + + assert(IllegalInstrNumber != DenseMapInfo::getEmptyKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + assert(IllegalInstrNumber != DenseMapInfo::getTombstoneKey() && + "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); + + return INumber; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index dfe570f..0480649 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_unittest_with_input_files(AnalysisTests DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp + IRSimilarityIdentifierTest.cpp IVDescriptorsTest.cpp LazyCallGraphTest.cpp LoadsTest.cpp diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp new file mode 100644 index 0000000..4cc81b2 --- /dev/null +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -0,0 +1,1177 @@ +//===- IRSimilarityIdentifierTest.cpp - IRSimilarityIdentifier unit tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Tests for components for finding similarity such as the instruction mapper, +// suffix tree usage, and structural analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace IRSimilarity; + +static std::unique_ptr makeLLVMModule(LLVMContext &Context, + StringRef ModuleStr) { + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(ModuleStr, Err, Context); + assert(M && "Bad LLVM IR?"); + return M; +} + +void getVectors(Module &M, std::vector &InstrList, + std::vector &UnsignedVec) { + BumpPtrAllocator InstDataAllocator; + IRInstructionMapper Mapper(&InstDataAllocator); + + for (Function &F : M) + for (BasicBlock &BB : F) + Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); +} + +// Checks that different opcodes are mapped to different values. +TEST(IRInstructionMapper, OpcodeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are not mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same opcodes and types are mapped to the same values. +TEST(IRInstructionMapper, OpcodeTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + + // Check whether the instructions are mapped to the same value. + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same opcode and different types are mapped to different +// values. +TEST(IRInstructionMapper, TypeDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { + bb0: + %0 = add i32 %a, %b + %1 = add i64 %c, %d + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that different predicates map to different values. +TEST(IRInstructionMapper, PredicateDifferentiation) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sge i32 %b, %a + %1 = icmp slt i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that predicates with the same swapped predicate map to different +// values. +TEST(IRInstructionMapper, PredicateIsomorphism) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp sgt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value. +TEST(IRInstructionMapper, PredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + %1 = icmp slt i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the same predicate maps to the same value for floating point +// CmpInsts. +TEST(IRInstructionMapper, FPPredicateSimilarity) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp olt double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the different predicate maps to a different value for floating +// point CmpInsts. +TEST(IRInstructionMapper, FPPredicatDifference) { + StringRef ModuleString = R"( + define i32 @f(double %a, double %b) { + bb0: + %0 = fcmp olt double %a, %b + %1 = fcmp oge double %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that the zexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, ZextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the sexts that have the same type parameters map to the same +// unsigned integer. +TEST(IRInstructionMapper, SextTypeSimilarity) { + StringRef ModuleString = R"( + define i32 @f(i32 %a) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i32 %a to i64 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that the zexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, ZextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = zext i32 %a to i64 + %1 = zext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + + +// Checks that the sexts that have the different type parameters map to the +// different unsigned integers. +TEST(IRInstructionMapper, SextTypeDifference) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i8 %b) { + bb0: + %0 = sext i32 %a to i64 + %1 = sext i8 %b to i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, LoadSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + %0 = load i32, i32* %a + %1 = load i64, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load volatile i32, i32* %a + %1 = load volatile i32, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, LoadSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + %0 = load atomic i32, i32* %a unordered, align 4 + %1 = load atomic i32, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the same type are mapped to the same unsigned +// integer. +TEST(IRInstructionMapper, StoreSimilarType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a + store i32 2, i32* %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that stores that have the different types are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentType) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i64* %b) { + bb0: + store i32 1, i32* %a + store i64 1, i64* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different aligns are mapped to different +// unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAlign) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store i32 1, i32* %a, align 4 + store i32 1, i32* %b, align 8 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the different volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// Checks that stores that have the same volatile settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameVolatile) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store volatile i32 1, i32* %a + store volatile i32 1, i32* %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the same atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreSameAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b unordered, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] == UnsignedVec[1]); +} + +// Checks that loads that have the different atomicity settings are mapped to +// different unsigned integers. +TEST(IRInstructionMapper, StoreDifferentAtomic) { + StringRef ModuleString = R"( + define i32 @f(i32* %a, i32* %b) { + bb0: + store atomic i32 1, i32* %a unordered, align 4 + store atomic i32 1, i32* %b monotonic, align 4 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + ASSERT_TRUE(UnsignedVec.size() == 3); + ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]); +} + +// In most cases, the illegal instructions we are collecting don't require any +// sort of setup. In these cases, we can just only have illegal instructions, +// and the mapper will create 0 length vectors, and we can check that. + +// In cases where we have legal instructions needed to set up the illegal +// instruction, to check illegal instructions are assigned unsigned integers +// from the maximum value decreasing to 0, it will be greater than a legal +// instruction that comes after. So to check that we have an illegal +// instruction, we place a legal instruction after an illegal instruction, and +// check that the illegal unsigned integer is greater than the unsigned integer +// of the legal instruction. + +// Checks that the branch is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is branching to an isomorphic +// location in a different region. +TEST(IRInstructionMapper, BranchIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = icmp slt i32 %a, %b + br i1 %0, label %bb0, label %bb1 + bb1: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a PHINode is mapped to be illegal since there is extra checking +// needed to ensure that a branch in one region is bin an isomorphic +// location in a different region. +TEST(IRInstructionMapper, PhiIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ] + ret i32 0 + bb1: + ret i32 1 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an alloca instruction is mapped to be illegal. +TEST(IRInstructionMapper, AllocaIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = alloca i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an getelementptr instruction is mapped to be illegal. There is +// extra checking required for the parameters if a getelementptr has more than +// two operands. +TEST(IRInstructionMapper, GetElementPtrIllegal) { + StringRef ModuleString = R"( + %struct.RT = type { i8, [10 x [20 x i32]], i8 } + %struct.ST = type { i32, double, %struct.RT } + define i32 @f(%struct.ST* %s, i32 %a, i32 %b) { + bb0: + %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a call instruction is mapped to be illegal. We have to perform +// extra checks to ensure that both the name and function type are the same. +TEST(IRInstructionMapper, CallIllegal) { + StringRef ModuleString = R"( + declare i32 @f1(i32, i32) + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = call i32 @f1(i32 %a, i32 %b) + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an invoke instruction is mapped to be illegal. Invoke +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, InvokeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i8 *%gep1, i32 %b) { + then: + invoke i32 undef(i8* undef) + to label %invoke unwind label %lpad + + invoke: + unreachable + + lpad: + landingpad { i8*, i32 } + catch i8* null + unreachable + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an callbr instructions are considered to be illegal. Callbr +// instructions are considered to be illegal because of the change in the +// control flow that is currently not recognized. +TEST(IRInstructionMapper, CallBrInstIllegal) { + StringRef ModuleString = R"( + define void @test() { + fail: + ret void + } + + define i32 @f(i32 %a, i32 %b) { + bb0: + callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %a, i8* blockaddress(@test, %fail)) to label %normal [label %fail] + fail: + ret i32 0 + normal: + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an debuginfo intrinsics are mapped to be invisible. Since they +// do not semantically change the program, they can be recognized as similar. +TEST(IRInstructionMapper, DebugInfoInvisible) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + then: + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %a, %b + ret i32 0 + } + + declare void @llvm.dbg.value(metadata) + !0 = distinct !{!"test\00", i32 10})"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(3)); +} + +// The following are all exception handling intrinsics. We do not currently +// handle these instruction because they are very context dependent. + +// Checks that an eh.typeid.for intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { + StringRef ModuleString = R"( + @_ZTIi = external constant i8* + define i32 @f() { + then: + %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + ret i32 0 + } + + declare i32 @llvm.eh.typeid.for(i8*))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptioncode intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = catchswitch within none [label %__except] unwind to caller + + __except: + %1 = catchpad within %0 [i8* null] + catchret from %1 to label %__except + + then: + %2 = call i32 @llvm.eh.exceptioncode(token %1) + ret i32 0 + } + + declare i32 @llvm.eh.exceptioncode(token))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.unwind intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + call void @llvm.eh.unwind.init() + ret i32 0 + } + + declare void @llvm.eh.unwind.init())"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that an eh.exceptionpointer intrinsic is mapped to be illegal. +TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + entry: + %0 = call i8* @llvm.eh.exceptionpointer.p0i8(i32 0) + ret i32 0 + } + + declare i8* @llvm.eh.exceptionpointer.p0i8(i32))"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a catchpad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CatchpadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + catchpad within %cs1 [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// Checks that a cleanuppad instruction is mapped to an illegal value. +TEST(IRInstructionMapper, CleanuppadIllegal) { + StringRef ModuleString = R"( + declare void @llvm.donothing() nounwind readnone + + define void @function() personality i8 3 { + entry: + invoke void @llvm.donothing() to label %normal unwind label %exception + exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + catchpad1: + %clean = cleanuppad within none [] + br label %normal + normal: + ret void + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(0)); +} + +// The following three instructions are memory transfer and setting based, which +// are considered illegal since is extra checking needed to handle the address +// space checking. + +// Checks that a memset instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemSetIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memcpy instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemCpyIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a memmove instruction is mapped to an illegal value. +TEST(IRInstructionMapper, MemMoveIllegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(6)); + ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]); +} + +// Checks that a variable argument instructions are mapped to an illegal value. +// We exclude variable argument instructions since variable arguments +// requires extra checking of the argument list. +TEST(IRInstructionMapper, VarArgsIllegal) { + StringRef ModuleString = R"( + declare void @llvm.va_start(i8*) + declare void @llvm.va_copy(i8*, i8*) + declare void @llvm.va_end(i8*) + + define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { + entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + %0 = va_arg i8** %ap, i32 + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_copy(i8* %v, i8* %ap1) + store double %b, double* %b.addr, align 8 + store double %b, double* %b.addr, align 8 + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(16)); + ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]); + ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]); + ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]); + ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]); +} + +// Check the length of adding two illegal instructions one after th other. We +// should find that only one element is added for each illegal range. +TEST(IRInstructionMapper, RepeatedIllegalLength) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = mul i32 %a, %b + %2 = call i32 @f(i32 %a, i32 %b) + %3 = call i32 @f(i32 %a, i32 %b) + %4 = add i32 %a, %b + %5 = mul i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + getVectors(*M, InstrList, UnsignedVec); + + // Check that the size of the unsigned vector and the instruction list are the + // same as a safety check. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + // Make sure that the unsigned vector is the expected size. + ASSERT_TRUE(UnsignedVec.size() == 6); +}