'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',
- 'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings',
+ 'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings',
'llvm-strip', 'llvm-tblgen', 'llvm-undname', 'llvm-c-test', 'llvm-cxxfilt',
'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder',
'bugpoint', 'llc', 'llvm-symbolizer', 'opt', 'sancov', 'sanstats'])
--- /dev/null
+define void @similar_func1() {
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
+
+define void @similar_func2() {
+entry:
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ store i32 3, i32* %b, align 4
+ store i32 4, i32* %c, align 4
+ %al = load i32, i32* %a
+ %bl = load i32, i32* %b
+ %cl = load i32, i32* %c
+ ret void
+}
--- /dev/null
+//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This program finds similar sections of a Module, and exports them as a JSON
+// file.
+//
+// To find similarities contained across multiple modules, please use llvm-link
+// first to merge the modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
+ cl::init("-"),
+ cl::value_desc("filename"));
+
+static cl::opt<std::string> InputSourceFile(cl::Positional,
+ cl::desc("<Source file>"),
+ cl::init("-"),
+ cl::value_desc("filename"));
+
+/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
+///
+/// \param I - The Instruction to find the instruction number for.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns The instruction number for \p I if it exists.
+Optional<unsigned>
+getPositionInModule(const Instruction *I,
+ const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+ assert(I && "Instruction is nullptr!");
+ DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
+ if (It == LLVMInstNum.end())
+ return None;
+ return It->second;
+}
+
+/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
+///
+/// \param FilePath - The path to the output location.
+/// \param SimSections - The similarity groups to process.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns A nonzero error code if there was a failure creating the file.
+std::error_code
+exportToFile(const StringRef FilePath,
+ const SimilarityGroupList &SimSections,
+ const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+ std::error_code EC;
+ std::unique_ptr<ToolOutputFile> Out(
+ new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
+ if (EC)
+ return EC;
+
+ json::OStream J(Out->os(), 1);
+ J.objectBegin();
+
+ unsigned SimOption = 1;
+ // Process each list of SimilarityGroups organized by the Module.
+ for (const SimilarityGroup &G : SimSections) {
+ std::string SimOptionStr = std::to_string(SimOption);
+ J.attributeBegin(SimOptionStr);
+ J.arrayBegin();
+ // For each file there is a list of the range where the similarity
+ // exists.
+ for (const IRSimilarityCandidate &C : G) {
+ Optional<unsigned> Start =
+ getPositionInModule((*C.front()).Inst, LLVMInstNum);
+ Optional<unsigned> End =
+ getPositionInModule((*C.back()).Inst, LLVMInstNum);
+
+ assert(Start.hasValue() &&
+ "Could not find instruction number for first instruction");
+ assert(End.hasValue() &&
+ "Could not find instruction number for last instruction");
+
+ J.object([&] {
+ J.attribute("start", Start.getValue());
+ J.attribute("end", End.getValue());
+ });
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+ SimOption++;
+ }
+ J.objectEnd();
+
+ Out->keep();
+
+ return EC;
+}
+
+int main(int argc, const char *argv[]) {
+ InitLLVM X(argc, argv);
+
+ cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
+
+ LLVMContext CurrContext;
+ SMDiagnostic Err;
+ std::unique_ptr<Module> ModuleToAnalyze =
+ parseIRFile(InputSourceFile, Err, CurrContext);
+
+ if (!ModuleToAnalyze) {
+ Err.print(argv[0], errs());
+ return 1;
+ }
+
+ // Mapping from an Instruction pointer to its occurrence in a sequential
+ // list of all the Instructions in a Module.
+ DenseMap<Instruction *, unsigned> LLVMInstNum;
+
+ // We give each instruction a number, which gives us a start and end value
+ // for the beginning and end of each IRSimilarityCandidate.
+ unsigned InstructionNumber = 1;
+ for (Function &F : *ModuleToAnalyze)
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB.instructionsWithoutDebug())
+ LLVMInstNum[&I]= InstructionNumber++;
+
+ // The similarity identifier we will use to find the similar sections.
+ IRSimilarityIdentifier SimIdent;
+ SimilarityGroupList SimilaritySections =
+ SimIdent.findSimilarity(*ModuleToAnalyze);
+
+ std::error_code E =
+ exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
+ if (E) {
+ errs() << argv[0] << ": " << E.message() << '\n';
+ return 2;
+ }
+
+ return 0;
+}