From 5155dff2784a47583d432d796b7cf47a0bed9f20 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Thu, 17 Sep 2020 15:43:40 -0500 Subject: [PATCH] [IRSim] Adding basic implementation of llvm-sim. This is a similarity visualization tool that accepts a Module and passes it to the IRSimilarityIdentifier. The resulting SimilarityGroups are output in a JSON file. Tests are found in test/tools/llvm-sim and check for the file not found, a bad module, and that the JSON is created correctly. Reviewers: paquette, jroelofs, MaskRay Recommit of: 15645d044bcfe2a0f63156048b302f997a717688 to fix linking errors. Differential Revision: https://reviews.llvm.org/D86974 --- llvm/test/CMakeLists.txt | 1 + llvm/test/lit.cfg.py | 2 +- llvm/test/tools/llvm-sim/Inputs/sim1.ll | 27 +++++ llvm/test/tools/llvm-sim/fail-cases.test | 8 ++ llvm/test/tools/llvm-sim/single-sim-file.test | 57 ++++++++++ llvm/test/tools/llvm-sim/single-sim.test | 56 ++++++++++ llvm/tools/llvm-sim/CMakeLists.txt | 9 ++ llvm/tools/llvm-sim/llvm-sim.cpp | 149 ++++++++++++++++++++++++++ 8 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-sim/Inputs/sim1.ll create mode 100644 llvm/test/tools/llvm-sim/fail-cases.test create mode 100644 llvm/test/tools/llvm-sim/single-sim-file.test create mode 100644 llvm/test/tools/llvm-sim/single-sim.test create mode 100644 llvm/tools/llvm-sim/CMakeLists.txt create mode 100644 llvm/tools/llvm-sim/llvm-sim.cpp diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 7c4fa2e..0c72adc 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -109,6 +109,7 @@ set(LLVM_TEST_DEPENDS llvm-readelf llvm-reduce llvm-rtdyld + llvm-sim llvm-size llvm-split llvm-strings diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 2a1ccc2..244d69e 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -162,7 +162,7 @@ tools.extend([ 'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca', 'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump', 'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf', - 'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings', + 'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings', 'llvm-strip', 'llvm-tblgen', 'llvm-undname', 'llvm-c-test', 'llvm-cxxfilt', 'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder', 'bugpoint', 'llc', 'llvm-symbolizer', 'opt', 'sancov', 'sanstats']) diff --git a/llvm/test/tools/llvm-sim/Inputs/sim1.ll b/llvm/test/tools/llvm-sim/Inputs/sim1.ll new file mode 100644 index 0000000..facc27d --- /dev/null +++ b/llvm/test/tools/llvm-sim/Inputs/sim1.ll @@ -0,0 +1,27 @@ +define void @similar_func1() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @similar_func2() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} diff --git a/llvm/test/tools/llvm-sim/fail-cases.test b/llvm/test/tools/llvm-sim/fail-cases.test new file mode 100644 index 0000000..41e3a56 --- /dev/null +++ b/llvm/test/tools/llvm-sim/fail-cases.test @@ -0,0 +1,8 @@ +# RUN: not llvm-sim %s 2>&1 | FileCheck %s +# RUN: not llvm-sim %s.2 2>&1 | FileCheck %s --check-prefix=EXIST + +# File reading error messaging tests. + +# CHECK: error: expected top-level entity + +# EXIST: error: Could not open input file: No such file or directory diff --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test new file mode 100644 index 0000000..5e45edf --- /dev/null +++ b/llvm/test/tools/llvm-sim/single-sim-file.test @@ -0,0 +1,57 @@ +# RUN: llvm-sim -o %t %S/Inputs/sim1.ll +# RUN: FileCheck %s < %t + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } diff --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test new file mode 100644 index 0000000..4e04682 --- /dev/null +++ b/llvm/test/tools/llvm-sim/single-sim.test @@ -0,0 +1,56 @@ +# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } diff --git a/llvm/tools/llvm-sim/CMakeLists.txt b/llvm/tools/llvm-sim/CMakeLists.txt new file mode 100644 index 0000000..7629905 --- /dev/null +++ b/llvm/tools/llvm-sim/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + Analysis + IRReader) + +add_llvm_tool(llvm-sim + llvm-sim.cpp +) diff --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp new file mode 100644 index 0000000..26e370f --- /dev/null +++ b/llvm/tools/llvm-sim/llvm-sim.cpp @@ -0,0 +1,149 @@ +//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program finds similar sections of a Module, and exports them as a JSON +// file. +// +// To find similarities contained across multiple modules, please use llvm-link +// first to merge the modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace llvm; +using namespace IRSimilarity; + +static cl::opt OutputFilename("o", cl::desc("Output Filename"), + cl::init("-"), + cl::value_desc("filename")); + +static cl::opt InputSourceFile(cl::Positional, + cl::desc(""), + cl::init("-"), + cl::value_desc("filename")); + +/// Retrieve the unique number \p I was mapped to in parseBitcodeFile. +/// +/// \param I - The Instruction to find the instruction number for. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns The instruction number for \p I if it exists. +Optional +getPositionInModule(const Instruction *I, + const DenseMap &LLVMInstNum) { + assert(I && "Instruction is nullptr!"); + DenseMap::const_iterator It = LLVMInstNum.find(I); + if (It == LLVMInstNum.end()) + return None; + return It->second; +} + +/// Exports the given SimilarityGroups to a JSON file at \p FilePath. +/// +/// \param FilePath - The path to the output location. +/// \param SimSections - The similarity groups to process. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns A nonzero error code if there was a failure creating the file. +std::error_code +exportToFile(const StringRef FilePath, + const SimilarityGroupList &SimSections, + const DenseMap &LLVMInstNum) { + std::error_code EC; + std::unique_ptr Out( + new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); + if (EC) + return EC; + + json::OStream J(Out->os(), 1); + J.objectBegin(); + + unsigned SimOption = 1; + // Process each list of SimilarityGroups organized by the Module. + for (const SimilarityGroup &G : SimSections) { + std::string SimOptionStr = std::to_string(SimOption); + J.attributeBegin(SimOptionStr); + J.arrayBegin(); + // For each file there is a list of the range where the similarity + // exists. + for (const IRSimilarityCandidate &C : G) { + Optional Start = + getPositionInModule((*C.front()).Inst, LLVMInstNum); + Optional End = + getPositionInModule((*C.back()).Inst, LLVMInstNum); + + assert(Start.hasValue() && + "Could not find instruction number for first instruction"); + assert(End.hasValue() && + "Could not find instruction number for last instruction"); + + J.object([&] { + J.attribute("start", Start.getValue()); + J.attribute("end", End.getValue()); + }); + } + J.arrayEnd(); + J.attributeEnd(); + SimOption++; + } + J.objectEnd(); + + Out->keep(); + + return EC; +} + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); + + LLVMContext CurrContext; + SMDiagnostic Err; + std::unique_ptr ModuleToAnalyze = + parseIRFile(InputSourceFile, Err, CurrContext); + + if (!ModuleToAnalyze) { + Err.print(argv[0], errs()); + return 1; + } + + // Mapping from an Instruction pointer to its occurrence in a sequential + // list of all the Instructions in a Module. + DenseMap LLVMInstNum; + + // We give each instruction a number, which gives us a start and end value + // for the beginning and end of each IRSimilarityCandidate. + unsigned InstructionNumber = 1; + for (Function &F : *ModuleToAnalyze) + for (BasicBlock &BB : F) + for (Instruction &I : BB.instructionsWithoutDebug()) + LLVMInstNum[&I]= InstructionNumber++; + + // The similarity identifier we will use to find the similar sections. + IRSimilarityIdentifier SimIdent; + SimilarityGroupList SimilaritySections = + SimIdent.findSimilarity(*ModuleToAnalyze); + + std::error_code E = + exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); + if (E) { + errs() << argv[0] << ": " << E.message() << '\n'; + return 2; + } + + return 0; +} -- 2.7.4