From: Mike Aizatsky Date: Wed, 28 Sep 2016 21:39:28 +0000 (+0000) Subject: [sancov] introducing symbolized coverage files (.symcov) X-Git-Tag: llvmorg-4.0.0-rc1~8696 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=392caa538dfa29757f8f1e8635508affab2b1072;p=platform%2Fupstream%2Fllvm.git [sancov] introducing symbolized coverage files (.symcov) Summary: Answering any meaningful questions about .sancov files requires accessing symbol information from the corresponding binary. This change introduces a separate intermediate data structure and format: symbolized coverage. It contains all symbol information that is required to answer common queries: - merging - coverd/uncovered files and functions - line status. Also removing the html report functionality from sancov: generated HTML files are too huge, and a different approach is required. Maintaining this half-working approach in the C++ is painful. Differential Revision: https://reviews.llvm.org/D24947 llvm-svn: 282639 --- diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index 7fe5eab..b8551e91 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -42,6 +42,10 @@ struct DILineInfo { bool operator!=(const DILineInfo &RHS) const { return !(*this == RHS); } + bool operator<(const DILineInfo &RHS) const { + return std::tie(FileName, FunctionName, Line, Column) < + std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column); + } }; typedef SmallVector, 16> DILineInfoTable; diff --git a/llvm/test/tools/sancov/Inputs/test-linux_x86_64.0.symcov b/llvm/test/tools/sancov/Inputs/test-linux_x86_64.0.symcov new file mode 100644 index 0000000..eca520f --- /dev/null +++ b/llvm/test/tools/sancov/Inputs/test-linux_x86_64.0.symcov @@ -0,0 +1,25 @@ +{ + "covered-points" : ["4e132b", "4e1472", "4e1520", "4e1553", "4e1586"], + "binary-hash" : "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5", + "point-symbol-info" : { + "test/tools/sancov/Inputs/foo.cpp" : { + "foo()" : { + "4e178c" : "5:0" + } + }, + "test/tools/sancov/Inputs/test.cpp" : { + "bar(std::string)" : { + "4e132b" : "12:0" + }, + "main" : { + "4e1472" : "14:0", + "4e14c2" : "16:9", + "4e1520" : "17:5", + "4e1553" : "17:5", + "4e1586" : "17:5", + "4e1635" : "19:1", + "4e1690" : "17:5" + } + } + } +} diff --git a/llvm/test/tools/sancov/Inputs/test-linux_x86_64.1.symcov b/llvm/test/tools/sancov/Inputs/test-linux_x86_64.1.symcov new file mode 100644 index 0000000..f8dc7c4 --- /dev/null +++ b/llvm/test/tools/sancov/Inputs/test-linux_x86_64.1.symcov @@ -0,0 +1,25 @@ +{ + "covered-points" : ["4e132b", "4e1472", "4e14c2", "4e1520", "4e1553", "4e1586", "4e178c"], + "binary-hash" : "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5", + "point-symbol-info" : { + "test/tools/sancov/Inputs/foo.cpp" : { + "foo()" : { + "4e178c" : "5:0" + } + }, + "test/tools/sancov/Inputs/test.cpp" : { + "bar(std::string)" : { + "4e132b" : "12:0" + }, + "main" : { + "4e1472" : "14:0", + "4e14c2" : "16:9", + "4e1520" : "17:5", + "4e1553" : "17:5", + "4e1586" : "17:5", + "4e1635" : "19:1", + "4e1690" : "17:5" + } + } + } +} diff --git a/llvm/test/tools/sancov/html-report.test b/llvm/test/tools/sancov/html-report.test deleted file mode 100644 index a50c8fb..0000000 --- a/llvm/test/tools/sancov/html-report.test +++ /dev/null @@ -1,6 +0,0 @@ -REQUIRES: x86_64-linux -RUN: sancov -html-report %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s - -// It's very difficult to test html report. Do basic smoke check. -CHECK: {{.*/Inputs/test.cpp}} - diff --git a/llvm/test/tools/sancov/merge.test b/llvm/test/tools/sancov/merge.test new file mode 100644 index 0000000..1e220ca --- /dev/null +++ b/llvm/test/tools/sancov/merge.test @@ -0,0 +1,64 @@ +REQUIRES: x86_64-linux +RUN: sancov -merge %p/Inputs/test-linux_x86_64.0.symcov| FileCheck --check-prefix=MERGE1 %s +RUN: sancov -merge %p/Inputs/test-linux_x86_64.0.symcov %p/Inputs/test-linux_x86_64.1.symcov| FileCheck --check-prefix=MERGE2 %s + +MERGE1: { +MERGE1-NEXT: "covered-points" : ["4e132b", "4e1472", "4e1520", "4e1553", "4e1586"], +MERGE1-NEXT: "binary-hash" : "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5", +MERGE1-NEXT: "point-symbol-info" : { +MERGE1-NEXT: "test/tools/sancov/Inputs/foo.cpp" : { +MERGE1-NEXT: "foo()" : { +MERGE1-NEXT: "4e178c" : "5:0" +MERGE1-NEXT: } +MERGE1-NEXT: }, +MERGE1-NEXT: "test/tools/sancov/Inputs/test.cpp" : { +MERGE1-NEXT: "bar(std::string)" : { +MERGE1-NEXT: "4e132b" : "12:0" +MERGE1-NEXT: }, +MERGE1-NEXT: "main" : { +MERGE1-NEXT: "4e1472" : "14:0", +MERGE1-NEXT: "4e14c2" : "16:9", +MERGE1-NEXT: "4e1520" : "17:5", +MERGE1-NEXT: "4e1553" : "17:5", +MERGE1-NEXT: "4e1586" : "17:5", +MERGE1-NEXT: "4e1635" : "19:1", +MERGE1-NEXT: "4e1690" : "17:5" +MERGE1-NEXT: } +MERGE1-NEXT: } +MERGE1-NEXT: } +MERGE1-NEXT: } + +MERGE2: { +MERGE2-NEXT: "covered-points" : ["BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e132b", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1472", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e14c2", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1520", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1553", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1586", "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e178c"], +MERGE2-NEXT: "point-symbol-info" : { +MERGE2-NEXT: "test/tools/sancov/Inputs/foo.cpp" : { +MERGE2-NEXT: "foo()" : { +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e178c" : "5:0", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e178c" : "5:0" +MERGE2-NEXT: } +MERGE2-NEXT: }, +MERGE2-NEXT: "test/tools/sancov/Inputs/test.cpp" : { +MERGE2-NEXT: "bar(std::string)" : { +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e132b" : "12:0", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e132b" : "12:0" +MERGE2-NEXT: }, +MERGE2-NEXT: "main" : { +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1472" : "14:0", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e14c2" : "16:9", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1520" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1553" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1586" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1635" : "19:1", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1690" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1472" : "14:0", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e14c2" : "16:9", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1520" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1553" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1586" : "17:5", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1635" : "19:1", +MERGE2-NEXT: "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5:4e1690" : "17:5" +MERGE2-NEXT: } +MERGE2-NEXT: } +MERGE2-NEXT: } +MERGE2-NEXT: } + diff --git a/llvm/test/tools/sancov/symbolize.test b/llvm/test/tools/sancov/symbolize.test new file mode 100644 index 0000000..bc6bfd2 --- /dev/null +++ b/llvm/test/tools/sancov/symbolize.test @@ -0,0 +1,29 @@ +REQUIRES: x86_64-linux +RUN: sancov -symbolize -strip_path_prefix="llvm/" %p/Inputs/test-linux_x86_64 %p/Inputs/test-linux_x86_64.0.sancov | FileCheck %s + +CHECK: { +CHECK-NEXT: "covered-points" : ["4e132b", "4e1472", "4e1520", "4e1553", "4e1586"], +CHECK-NEXT: "binary-hash" : "BB3CDD5045AED83906F6ADCC1C4DAF7E2596A6B5", +CHECK-NEXT: "point-symbol-info" : { +CHECK-NEXT: "test/tools/sancov/Inputs/foo.cpp" : { +CHECK-NEXT: "foo()" : { +CHECK-NEXT: "4e178c" : "5:0" +CHECK-NEXT: } +CHECK-NEXT: }, +CHECK-NEXT: "test/tools/sancov/Inputs/test.cpp" : { +CHECK-NEXT: "bar(std::string)" : { +CHECK-NEXT: "4e132b" : "12:0" +CHECK-NEXT: }, +CHECK-NEXT: "main" : { +CHECK-NEXT: "4e1472" : "14:0", +CHECK-NEXT: "4e14c2" : "16:9", +CHECK-NEXT: "4e1520" : "17:5", +CHECK-NEXT: "4e1553" : "17:5", +CHECK-NEXT: "4e1586" : "17:5", +CHECK-NEXT: "4e1635" : "19:1", +CHECK-NEXT: "4e1690" : "17:5" +CHECK-NEXT: } +CHECK-NEXT: } +CHECK-NEXT: } +CHECK-NEXT:} + diff --git a/llvm/tools/sancov/sancov.cc b/llvm/tools/sancov/sancov.cc index 8d1aa27..44e0f9f 100644 --- a/llvm/tools/sancov/sancov.cc +++ b/llvm/tools/sancov/sancov.cc @@ -11,6 +11,7 @@ // coverage. //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" @@ -41,11 +42,14 @@ #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/SpecialCaseList.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include @@ -62,28 +66,33 @@ namespace { // --------- COMMAND LINE FLAGS --------- enum ActionType { - PrintAction, - PrintCovPointsAction, CoveredFunctionsAction, - NotCoveredFunctionsAction, HtmlReportAction, - StatsAction + MergeAction, + NotCoveredFunctionsAction, + PrintAction, + PrintCovPointsAction, + StatsAction, + SymbolizeAction }; cl::opt Action( cl::desc("Action (required)"), cl::Required, - cl::values(clEnumValN(PrintAction, "print", "Print coverage addresses"), - clEnumValN(PrintCovPointsAction, "print-coverage-pcs", - "Print coverage instrumentation points addresses."), - clEnumValN(CoveredFunctionsAction, "covered-functions", - "Print all covered funcions."), - clEnumValN(NotCoveredFunctionsAction, "not-covered-functions", - "Print all not covered funcions."), - clEnumValN(HtmlReportAction, "html-report", - "Print HTML coverage report."), - clEnumValN(StatsAction, "print-coverage-stats", - "Print coverage statistics."), - clEnumValEnd)); + cl::values( + clEnumValN(PrintAction, "print", "Print coverage addresses"), + clEnumValN(PrintCovPointsAction, "print-coverage-pcs", + "Print coverage instrumentation points addresses."), + clEnumValN(CoveredFunctionsAction, "covered-functions", + "Print all covered funcions."), + clEnumValN(NotCoveredFunctionsAction, "not-covered-functions", + "Print all not covered funcions."), + clEnumValN(StatsAction, "print-coverage-stats", + "Print coverage statistics."), + clEnumValN(HtmlReportAction, "html-report", + "REMOVED. Use -symbolize & symcov-report-server.py."), + clEnumValN(SymbolizeAction, "symbolize", + "Produces a symbolized JSON report from binary report."), + clEnumValN(MergeAction, "merge", "Merges reports."), clEnumValEnd)); static cl::list ClInputFiles(cl::Positional, cl::OneOrMore, @@ -119,66 +128,99 @@ static const uint32_t BinCoverageMagic = 0xC0BFFFFF; static const uint32_t Bitness32 = 0xFFFFFF32; static const uint32_t Bitness64 = 0xFFFFFF64; +static Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov"); +static Regex SymcovFileRegex(".*\\.symcov"); + +// --------- MAIN DATASTRUCTURES ---------- + +// Contents of .sancov file: list of coverage point addresses that were +// executed. +struct RawCoverage { + explicit RawCoverage(std::unique_ptr> Addrs) + : Addrs(std::move(Addrs)) {} + + // Read binary .sancov file. + static ErrorOr> + read(const std::string &FileName); + + std::unique_ptr> Addrs; +}; + +// Coverage point has an opaque Id and corresponds to multiple source locations. +struct CoveragePoint { + explicit CoveragePoint(const std::string &Id) : Id(Id) {} + + std::string Id; + SmallVector Locs; +}; + +// Symcov file content: set of covered Ids plus information about all available +// coverage points. +struct SymbolizedCoverage { + // Read json .symcov file. + static std::unique_ptr read(const std::string &InputFile); + + std::set CoveredIds; + std::string BinaryHash; + std::vector Points; +}; + +struct CoverageStats { + size_t AllPoints; + size_t CovPoints; + size_t AllFns; + size_t CovFns; +}; + // --------- ERROR HANDLING --------- -static void Fail(const llvm::Twine &E) { +static void fail(const llvm::Twine &E) { errs() << "Error: " << E << "\n"; exit(1); } -static void FailIfError(std::error_code Error) { +static void failIf(bool B, const llvm::Twine &E) { + if (B) + fail(E); +} + +static void failIfError(std::error_code Error) { if (!Error) return; errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n"; exit(1); } -template static void FailIfError(const ErrorOr &E) { - FailIfError(E.getError()); +template static void failIfError(const ErrorOr &E) { + failIfError(E.getError()); } -static void FailIfError(Error Err) { +static void failIfError(Error Err) { if (Err) { logAllUnhandledErrors(std::move(Err), errs(), "Error: "); exit(1); } } -template static void FailIfError(Expected &E) { - FailIfError(E.takeError()); +template static void failIfError(Expected &E) { + failIfError(E.takeError()); } -static void FailIfNotEmpty(const llvm::Twine &E) { +static void failIfNotEmpty(const llvm::Twine &E) { if (E.str().empty()) return; - Fail(E); + fail(E); } template -static void FailIfEmpty(const std::unique_ptr &Ptr, +static void failIfEmpty(const std::unique_ptr &Ptr, const std::string &Message) { if (Ptr.get()) return; - Fail(Message); -} - -// --------- - -// Produces std::map> grouping input -// elements by FuncTy result. -template -static inline auto group_by(const RangeTy &R, FuncTy F) - -> std::map::type, - std::vector::type>> { - std::map::type, - std::vector::type>> - Result; - for (const auto &E : R) { - Result[F(E)].push_back(E); - } - return Result; + fail(Message); } +// ----------- Coverage I/O ---------- template static void readInts(const char *Start, const char *End, std::set *Ints) { @@ -187,34 +229,321 @@ static void readInts(const char *Start, const char *End, std::copy(S, E, std::inserter(*Ints, Ints->end())); } -struct FileLoc { - bool operator<(const FileLoc &RHS) const { - return std::tie(FileName, Line) < std::tie(RHS.FileName, RHS.Line); +ErrorOr> +RawCoverage::read(const std::string &FileName) { + ErrorOr> BufOrErr = + MemoryBuffer::getFile(FileName); + if (!BufOrErr) + return BufOrErr.getError(); + std::unique_ptr Buf = std::move(BufOrErr.get()); + if (Buf->getBufferSize() < 8) { + errs() << "File too small (<8): " << Buf->getBufferSize() << '\n'; + return make_error_code(errc::illegal_byte_sequence); } + const FileHeader *Header = + reinterpret_cast(Buf->getBufferStart()); - std::string FileName; - uint32_t Line; -}; + if (Header->Magic != BinCoverageMagic) { + errs() << "Wrong magic: " << Header->Magic << '\n'; + return make_error_code(errc::illegal_byte_sequence); + } -struct FileFn { - bool operator<(const FileFn &RHS) const { - return std::tie(FileName, FunctionName) < - std::tie(RHS.FileName, RHS.FunctionName); + auto Addrs = llvm::make_unique>(); + + switch (Header->Bitness) { + case Bitness64: + readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), + Addrs.get()); + break; + case Bitness32: + readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), + Addrs.get()); + break; + default: + errs() << "Unsupported bitness: " << Header->Bitness << '\n'; + return make_error_code(errc::illegal_byte_sequence); } - std::string FileName; - std::string FunctionName; -}; + return std::unique_ptr(new RawCoverage(std::move(Addrs))); +} -struct FnLoc { - bool operator<(const FnLoc &RHS) const { - return std::tie(Loc, FunctionName) < std::tie(RHS.Loc, RHS.FunctionName); +// Print coverage addresses. +raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) { + for (auto Addr : *CoverageData.Addrs) { + OS << "0x"; + OS.write_hex(Addr); + OS << "\n"; } + return OS; +} - FileLoc Loc; - std::string FunctionName; +static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) { + OS << "all-edges: " << Stats.AllPoints << "\n"; + OS << "cov-edges: " << Stats.CovPoints << "\n"; + OS << "all-functions: " << Stats.AllFns << "\n"; + OS << "cov-functions: " << Stats.CovFns << "\n"; + return OS; +} + +// Helper for writing out JSON. Handles indents and commas using +// scope variables for objects and arrays. +class JSONWriter { +public: + JSONWriter(raw_ostream &Out) : OS(Out) {} + JSONWriter(const JSONWriter &) = delete; + ~JSONWriter() { OS << "\n"; } + + void operator<<(StringRef S) { printJSONStringLiteral(S, OS); } + + // Helper RAII class to output JSON objects. + class Object { + public: + Object(JSONWriter *W, raw_ostream &OS) : W(W), OS(OS) { + OS << "{"; + W->Indent++; + } + Object(const Object &) = delete; + ~Object() { + W->Indent--; + OS << "\n"; + W->indent(); + OS << "}"; + } + + void key(StringRef Key) { + Index++; + if (Index > 0) + OS << ","; + OS << "\n"; + W->indent(); + printJSONStringLiteral(Key, OS); + OS << " : "; + } + + private: + JSONWriter *W; + raw_ostream &OS; + int Index = -1; + }; + + std::unique_ptr object() { return make_unique(this, OS); } + + // Helper RAII class to output JSON arrays. + class Array { + public: + Array(raw_ostream &OS) : OS(OS) { OS << "["; } + Array(const Array &) = delete; + ~Array() { OS << "]"; } + void next() { + Index++; + if (Index > 0) + OS << ", "; + } + + private: + raw_ostream &OS; + int Index = -1; + }; + + std::unique_ptr array() { return make_unique(OS); } + +private: + void indent() { OS.indent(Indent * 2); } + + static void printJSONStringLiteral(StringRef S, raw_ostream &OS) { + if (S.find('"') == std::string::npos) { + OS << "\"" << S << "\""; + return; + } + OS << "\""; + for (char Ch : S.bytes()) { + if (Ch == '"') + OS << "\\"; + OS << Ch; + } + OS << "\""; + } + + raw_ostream &OS; + int Indent = 0; }; +// Output symbolized information for coverage points in JSON. +// Format: +// { +// '' : { +// '' : { +// ' : ':' &Points) { + // Group points by file. + auto ByFile(W.object()); + std::map> PointsByFile; + for (const auto &Point : Points) { + for (const DILineInfo &Loc : Point.Locs) { + PointsByFile[Loc.FileName].push_back(&Point); + } + } + + for (const auto &P : PointsByFile) { + std::string FileName = P.first; + ByFile->key(FileName); + + // Group points by function. + auto ByFn(W.object()); + std::map> PointsByFn; + for (auto PointPtr : P.second) { + for (const DILineInfo &Loc : PointPtr->Locs) { + PointsByFn[Loc.FunctionName].push_back(PointPtr); + } + } + + for (const auto &P : PointsByFn) { + std::string FunctionName = P.first; + ByFn->key(FunctionName); + + // Output : ":". + auto ById(W.object()); + for (const CoveragePoint *Point : P.second) { + for (const auto &Loc : Point->Locs) { + if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) + continue; + + ById->key(Point->Id); + W << (utostr(Loc.Line) + ":" + utostr(Loc.Column)); + } + } + } + } +} + +static void operator<<(JSONWriter &W, const SymbolizedCoverage &C) { + auto O(W.object()); + + { + O->key("covered-points"); + auto PointsArray(W.array()); + + for (const auto &P : C.CoveredIds) { + PointsArray->next(); + W << P; + } + } + + { + if (!C.BinaryHash.empty()) { + O->key("binary-hash"); + W << C.BinaryHash; + } + } + + { + O->key("point-symbol-info"); + W << C.Points; + } +} + +static std::string parseScalarString(yaml::Node *N) { + SmallString<64> StringStorage; + yaml::ScalarNode *S = dyn_cast(N); + failIf(!S, "expected string"); + return S->getValue(StringStorage); +} + +std::unique_ptr +SymbolizedCoverage::read(const std::string &InputFile) { + auto Coverage(make_unique()); + + std::map Points; + ErrorOr> BufOrErr = + MemoryBuffer::getFile(InputFile); + failIfError(BufOrErr); + + SourceMgr SM; + yaml::Stream S(**BufOrErr, SM); + + yaml::document_iterator DI = S.begin(); + failIf(DI == S.end(), "empty document: " + InputFile); + yaml::Node *Root = DI->getRoot(); + failIf(!Root, "expecting root node: " + InputFile); + yaml::MappingNode *Top = dyn_cast(Root); + failIf(!Top, "expecting mapping node: " + InputFile); + + for (auto &KVNode : *Top) { + auto Key = parseScalarString(KVNode.getKey()); + + if (Key == "covered-points") { + yaml::SequenceNode *Points = + dyn_cast(KVNode.getValue()); + failIf(!Points, "expected array: " + InputFile); + + for (auto I = Points->begin(), E = Points->end(); I != E; ++I) { + Coverage->CoveredIds.insert(parseScalarString(&*I)); + } + } else if (Key == "binary-hash") { + Coverage->BinaryHash = parseScalarString(KVNode.getValue()); + } else if (Key == "point-symbol-info") { + yaml::MappingNode *PointSymbolInfo = + dyn_cast(KVNode.getValue()); + failIf(!PointSymbolInfo, "expected mapping node: " + InputFile); + + for (auto &FileKVNode : *PointSymbolInfo) { + auto Filename = parseScalarString(FileKVNode.getKey()); + + yaml::MappingNode *FileInfo = + dyn_cast(FileKVNode.getValue()); + failIf(!FileInfo, "expected mapping node: " + InputFile); + + for (auto &FunctionKVNode : *FileInfo) { + auto FunctionName = parseScalarString(FunctionKVNode.getKey()); + + yaml::MappingNode *FunctionInfo = + dyn_cast(FunctionKVNode.getValue()); + failIf(!FunctionInfo, "expected mapping node: " + InputFile); + + for (auto &PointKVNode : *FunctionInfo) { + auto PointId = parseScalarString(PointKVNode.getKey()); + auto Loc = parseScalarString(PointKVNode.getValue()); + + size_t ColonPos = Loc.find(':'); + failIf(ColonPos == std::string::npos, "expected ':': " + InputFile); + + auto LineStr = Loc.substr(0, ColonPos); + auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); + + if (Points.find(PointId) == Points.end()) + Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); + + DILineInfo LineInfo; + LineInfo.FileName = Filename; + LineInfo.FunctionName = FunctionName; + char *End; + LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); + LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); + + CoveragePoint *CoveragePoint = &Points.find(PointId)->second; + CoveragePoint->Locs.push_back(LineInfo); + } + } + } + } else { + errs() << "Ignoring unknown key: " << Key << "\n"; + } + } + + for (auto &KV : Points) { + Coverage->Points.push_back(KV.second); + } + + return Coverage; +} + +// ---------- MAIN FUNCTIONALITY ---------- + std::string stripPathPrefix(std::string Path) { if (ClStripPathPrefix.empty()) return Path; @@ -232,21 +561,11 @@ static std::unique_ptr createSymbolizer() { new symbolize::LLVMSymbolizer(SymbolizerOptions)); } -// A DILineInfo with address. -struct AddrInfo : public DILineInfo { - uint64_t Addr; - - AddrInfo(const DILineInfo &DI, uint64_t Addr) : DILineInfo(DI), Addr(Addr) { - FileName = normalizeFilename(FileName); - } - -private: - static std::string normalizeFilename(const std::string &FileName) { - SmallString<256> S(FileName); - sys::path::remove_dots(S, /* remove_dot_dot */ true); - return S.str().str(); - } -}; +static std::string normalizeFilename(const std::string &FileName) { + SmallString<256> S(FileName); + sys::path::remove_dots(S, /* remove_dot_dot */ true); + return stripPathPrefix(S.str().str()); +} class Blacklists { public: @@ -254,16 +573,14 @@ public: : DefaultBlacklist(createDefaultBlacklist()), UserBlacklist(createUserBlacklist()) {} - // AddrInfo contains normalized filename. It is important to check it rather - // than DILineInfo. - bool isBlacklisted(const AddrInfo &AI) { - if (DefaultBlacklist && DefaultBlacklist->inSection("fun", AI.FunctionName)) + bool isBlacklisted(const DILineInfo &I) { + if (DefaultBlacklist && DefaultBlacklist->inSection("fun", I.FunctionName)) return true; - if (DefaultBlacklist && DefaultBlacklist->inSection("src", AI.FileName)) + if (DefaultBlacklist && DefaultBlacklist->inSection("src", I.FileName)) return true; - if (UserBlacklist && UserBlacklist->inSection("fun", AI.FunctionName)) + if (UserBlacklist && UserBlacklist->inSection("fun", I.FunctionName)) return true; - if (UserBlacklist && UserBlacklist->inSection("src", AI.FileName)) + if (UserBlacklist && UserBlacklist->inSection("src", I.FileName)) return true; return false; } @@ -276,7 +593,7 @@ private: MemoryBuffer::getMemBuffer(DefaultBlacklistStr); std::string Error; auto Blacklist = SpecialCaseList::create(MB.get(), Error); - FailIfNotEmpty(Error); + failIfNotEmpty(Error); return Blacklist; } @@ -290,32 +607,43 @@ private: std::unique_ptr UserBlacklist; }; -// Collect all debug info for given addresses. -static std::vector getAddrInfo(const std::string &ObjectFile, - const std::set &Addrs, - bool InlinedCode) { - std::vector Result; +static std::vector +getCoveragePoints(const std::string &ObjectFile, + const std::set &Addrs, bool InlinedCode) { + std::vector Result; auto Symbolizer(createSymbolizer()); Blacklists B; for (auto Addr : Addrs) { + std::set Infos; // deduplicate debug info. + auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr); - FailIfError(LineInfo); - auto LineAddrInfo = AddrInfo(*LineInfo, Addr); - if (B.isBlacklisted(LineAddrInfo)) + failIfError(LineInfo); + LineInfo->FileName = normalizeFilename(LineInfo->FileName); + if (B.isBlacklisted(*LineInfo)) continue; - Result.push_back(LineAddrInfo); + + auto Id = utohexstr(Addr, true); + auto Point = CoveragePoint(Id); + Infos.insert(*LineInfo); + Point.Locs.push_back(*LineInfo); + if (InlinedCode) { auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr); - FailIfError(InliningInfo); + failIfError(InliningInfo); for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) { auto FrameInfo = InliningInfo->getFrame(I); - auto FrameAddrInfo = AddrInfo(FrameInfo, Addr); - if (B.isBlacklisted(FrameAddrInfo)) + FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); + if (B.isBlacklisted(FrameInfo)) continue; - Result.push_back(FrameAddrInfo); + if (Infos.find(FrameInfo) == Infos.end()) { + Infos.insert(FrameInfo); + Point.Locs.push_back(FrameInfo); + } } } + + Result.push_back(Point); } return Result; @@ -353,7 +681,7 @@ static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O, if (IndirectSymbol < Symtab.nsyms) { object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol)); Expected Name = Symbol.getName(); - FailIfError(Name); + failIfError(Name); if (isCoveragePointSymbol(Name.get())) { Result->insert(Addr); } @@ -376,11 +704,11 @@ findSanitizerCovFunctions(const object::ObjectFile &O) { for (const object::SymbolRef &Symbol : O.symbols()) { Expected AddressOrErr = Symbol.getAddress(); - FailIfError(AddressOrErr); + failIfError(AddressOrErr); uint64_t Address = AddressOrErr.get(); Expected NameOrErr = Symbol.getName(); - FailIfError(NameOrErr); + failIfError(NameOrErr); StringRef Name = NameOrErr.get(); if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined) && @@ -394,11 +722,11 @@ findSanitizerCovFunctions(const object::ObjectFile &O) { CO->export_directories()) { uint32_t RVA; std::error_code EC = Export.getExportRVA(RVA); - FailIfError(EC); + failIfError(EC); StringRef Name; EC = Export.getSymbolName(Name); - FailIfError(EC); + failIfError(EC); if (isCoveragePointSymbol(Name)) Result.insert(CO->getImageBase() + RVA); @@ -423,36 +751,36 @@ static void getObjectCoveragePoints(const object::ObjectFile &O, std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); - FailIfNotEmpty(Error); + failIfNotEmpty(Error); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, "", "")); - FailIfEmpty(STI, "no subtarget info for target " + TripleName); + failIfEmpty(STI, "no subtarget info for target " + TripleName); std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); - FailIfEmpty(MRI, "no register info for target " + TripleName); + failIfEmpty(MRI, "no register info for target " + TripleName); std::unique_ptr AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName)); - FailIfEmpty(AsmInfo, "no asm info for target " + TripleName); + failIfEmpty(AsmInfo, "no asm info for target " + TripleName); std::unique_ptr MOFI(new MCObjectFileInfo); MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); std::unique_ptr DisAsm( TheTarget->createMCDisassembler(*STI, Ctx)); - FailIfEmpty(DisAsm, "no disassembler info for target " + TripleName); + failIfEmpty(DisAsm, "no disassembler info for target " + TripleName); std::unique_ptr MII(TheTarget->createMCInstrInfo()); - FailIfEmpty(MII, "no instruction info for target " + TripleName); + failIfEmpty(MII, "no instruction info for target " + TripleName); std::unique_ptr MIA( TheTarget->createMCInstrAnalysis(MII.get())); - FailIfEmpty(MIA, "no instruction analysis info for target " + TripleName); + failIfEmpty(MIA, "no instruction analysis info for target " + TripleName); auto SanCovAddrs = findSanitizerCovFunctions(O); if (SanCovAddrs.empty()) - Fail("__sanitizer_cov* functions not found"); + fail("__sanitizer_cov* functions not found"); for (object::SectionRef Section : O.sections()) { if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same. @@ -463,7 +791,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O, continue; StringRef BytesStr; - FailIfError(Section.getContents(BytesStr)); + failIfError(Section.getContents(BytesStr)); ArrayRef Bytes(reinterpret_cast(BytesStr.data()), BytesStr.size()); @@ -494,13 +822,13 @@ visitObjectFiles(const object::Archive &A, Error Err; for (auto &C : A.children(Err)) { Expected> ChildOrErr = C.getAsBinary(); - FailIfError(ChildOrErr); + failIfError(ChildOrErr); if (auto *O = dyn_cast(&*ChildOrErr.get())) Fn(*O); else - FailIfError(object::object_error::invalid_file_type); + failIfError(object::object_error::invalid_file_type); } - FailIfError(std::move(Err)); + failIfError(std::move(Err)); } static void @@ -509,7 +837,7 @@ visitObjectFiles(const std::string &FileName, Expected> BinaryOrErr = object::createBinary(FileName); if (!BinaryOrErr) - FailIfError(BinaryOrErr); + failIfError(BinaryOrErr); object::Binary &Binary = *BinaryOrErr.get().getBinary(); if (object::Archive *A = dyn_cast(&Binary)) @@ -517,10 +845,11 @@ visitObjectFiles(const std::string &FileName, else if (object::ObjectFile *O = dyn_cast(&Binary)) Fn(*O); else - FailIfError(object::object_error::invalid_file_type); + failIfError(object::object_error::invalid_file_type); } -std::set findSanitizerCovFunctions(const std::string &FileName) { +static std::set +findSanitizerCovFunctions(const std::string &FileName) { std::set Result; visitObjectFiles(FileName, [&](const object::ObjectFile &O) { auto Addrs = findSanitizerCovFunctions(O); @@ -532,7 +861,7 @@ std::set findSanitizerCovFunctions(const std::string &FileName) { // Locate addresses of all coverage points in a file. Coverage point // is defined as the 'address of instruction following __sanitizer_cov // call - 1'. -std::set getCoveragePoints(const std::string &FileName) { +static std::set findCoveragePointAddrs(const std::string &FileName) { std::set Result; visitObjectFiles(FileName, [&](const object::ObjectFile &O) { getObjectCoveragePoints(O, &Result); @@ -541,66 +870,18 @@ std::set getCoveragePoints(const std::string &FileName) { } static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) { - for (uint64_t Addr : getCoveragePoints(ObjFile)) { + for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) { OS << "0x"; OS.write_hex(Addr); OS << "\n"; } } -static std::string escapeHtml(const std::string &S) { - std::string Result; - Result.reserve(S.size()); - for (char Ch : S) { - switch (Ch) { - case '&': - Result.append("&"); - break; - case '\'': - Result.append("'"); - break; - case '"': - Result.append("""); - break; - case '<': - Result.append("<"); - break; - case '>': - Result.append(">"); - break; - default: - Result.push_back(Ch); - break; - } - } - return Result; -} - -// Adds leading zeroes wrapped in 'lz' style. -// Leading zeroes help locate 000% coverage. -static std::string formatHtmlPct(size_t Pct) { - Pct = std::max(std::size_t{0}, std::min(std::size_t{100}, Pct)); - - std::string Num = std::to_string(Pct); - std::string Zeroes(3 - Num.size(), '0'); - if (!Zeroes.empty()) - Zeroes = "" + Zeroes + ""; - - return Zeroes + Num; -} - -static std::string anchorName(const std::string &Anchor) { - llvm::MD5 Hasher; - llvm::MD5::MD5Result Hash; - Hasher.update(Anchor); - Hasher.final(Hash); - - SmallString<32> HexString; - llvm::MD5::stringifyResult(Hash, HexString); - return HexString.str().str(); -} - static ErrorOr isCoverageFile(const std::string &FileName) { + auto ShortFileName = llvm::sys::path::filename(FileName); + if (!SancovFileRegex.match(ShortFileName)) + return false; + ErrorOr> BufOrErr = MemoryBuffer::getFile(FileName); if (!BufOrErr) { @@ -618,490 +899,187 @@ static ErrorOr isCoverageFile(const std::string &FileName) { return Header->Magic == BinCoverageMagic; } -struct CoverageStats { - CoverageStats() : AllPoints(0), CovPoints(0), AllFns(0), CovFns(0) {} - - size_t AllPoints; - size_t CovPoints; - size_t AllFns; - size_t CovFns; -}; - -static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) { - OS << "all-edges: " << Stats.AllPoints << "\n"; - OS << "cov-edges: " << Stats.CovPoints << "\n"; - OS << "all-functions: " << Stats.AllFns << "\n"; - OS << "cov-functions: " << Stats.CovFns << "\n"; - return OS; +static bool isSymbolizedCoverageFile(const std::string &FileName) { + auto ShortFileName = llvm::sys::path::filename(FileName); + return SymcovFileRegex.match(ShortFileName); } -class CoverageData { -public: - // Read single file coverage data. - static ErrorOr> - read(const std::string &FileName) { - ErrorOr> BufOrErr = - MemoryBuffer::getFile(FileName); - if (!BufOrErr) - return BufOrErr.getError(); - std::unique_ptr Buf = std::move(BufOrErr.get()); - if (Buf->getBufferSize() < 8) { - errs() << "File too small (<8): " << Buf->getBufferSize() << '\n'; - return make_error_code(errc::illegal_byte_sequence); - } - const FileHeader *Header = - reinterpret_cast(Buf->getBufferStart()); - - if (Header->Magic != BinCoverageMagic) { - errs() << "Wrong magic: " << Header->Magic << '\n'; - return make_error_code(errc::illegal_byte_sequence); - } - - auto Addrs = llvm::make_unique>(); - - switch (Header->Bitness) { - case Bitness64: - readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), - Addrs.get()); - break; - case Bitness32: - readInts(Buf->getBufferStart() + 8, Buf->getBufferEnd(), - Addrs.get()); - break; - default: - errs() << "Unsupported bitness: " << Header->Bitness << '\n'; - return make_error_code(errc::illegal_byte_sequence); - } - - return std::unique_ptr(new CoverageData(std::move(Addrs))); - } - - // Merge multiple coverage data together. - static std::unique_ptr - merge(const std::vector> &Covs) { - auto Addrs = llvm::make_unique>(); - - for (const auto &Cov : Covs) - Addrs->insert(Cov->Addrs->begin(), Cov->Addrs->end()); +static std::unique_ptr +symbolize(const RawCoverage &Data, const std::string ObjectFile) { + auto Coverage = make_unique(); - return std::unique_ptr(new CoverageData(std::move(Addrs))); + ErrorOr> BufOrErr = + MemoryBuffer::getFile(ObjectFile); + failIfError(BufOrErr); + SHA1 Hasher; + Hasher.update((*BufOrErr)->getBuffer()); + Coverage->BinaryHash = toHex(Hasher.final()); + + for (uint64_t Addr : *Data.Addrs) { + Coverage->CoveredIds.insert(utohexstr(Addr, true)); } - // Read list of files and merges their coverage info. - static ErrorOr> - readAndMerge(const std::vector &FileNames) { - std::vector> Covs; - for (const auto &FileName : FileNames) { - auto Cov = read(FileName); - if (!Cov) - return Cov.getError(); - Covs.push_back(std::move(Cov.get())); - } - return merge(Covs); + std::set AllAddrs = findCoveragePointAddrs(ObjectFile); + if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(), + Data.Addrs->end())) { + fail("Coverage points in binary and .sancov file do not match."); } + Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, true); + return Coverage; +} - // Print coverage addresses. - void printAddrs(raw_ostream &OS) { - for (auto Addr : *Addrs) { - OS << "0x"; - OS.write_hex(Addr); - OS << "\n"; - } +struct FileFn { + bool operator<(const FileFn &RHS) const { + return std::tie(FileName, FunctionName) < + std::tie(RHS.FileName, RHS.FunctionName); } -protected: - explicit CoverageData(std::unique_ptr> Addrs) - : Addrs(std::move(Addrs)) {} - - friend class CoverageDataWithObjectFile; - - std::unique_ptr> Addrs; + std::string FileName; + std::string FunctionName; }; -// Coverage data translated into source code line-level information. -// Fetches debug info in constructor and calculates various information per -// request. -class SourceCoverageData { -public: - enum LineStatus { - // coverage information for the line is not available. - // default value in maps. - UNKNOWN = 0, - // the line is fully covered. - COVERED = 1, - // the line is fully uncovered. - NOT_COVERED = 2, - // some points in the line a covered, some are not. - MIXED = 3 - }; - - SourceCoverageData(std::string ObjectFile, const std::set &Addrs) - : AllCovPoints(getCoveragePoints(ObjectFile)) { - if (!std::includes(AllCovPoints.begin(), AllCovPoints.end(), Addrs.begin(), - Addrs.end())) { - Fail("Coverage points in binary and .sancov file do not match."); - } - - AllAddrInfo = getAddrInfo(ObjectFile, AllCovPoints, true); - CovAddrInfo = getAddrInfo(ObjectFile, Addrs, true); - } - - // Compute number of coverage points hit/total in a file. - // file_name -> - std::map> computeFileCoverage() { - std::map> FileCoverage; - auto AllCovPointsByFile = - group_by(AllAddrInfo, [](const AddrInfo &AI) { return AI.FileName; }); - auto CovPointsByFile = - group_by(CovAddrInfo, [](const AddrInfo &AI) { return AI.FileName; }); - - for (const auto &P : AllCovPointsByFile) { - const std::string &FileName = P.first; - - FileCoverage[FileName] = - std::make_pair(CovPointsByFile[FileName].size(), - AllCovPointsByFile[FileName].size()); - } - return FileCoverage; - } - - // line_number -> line_status. - typedef std::map LineStatusMap; - // file_name -> LineStatusMap - typedef std::map FileLineStatusMap; - - // fills in the {file_name -> {line_no -> status}} map. - FileLineStatusMap computeLineStatusMap() { - FileLineStatusMap StatusMap; - - auto AllLocs = group_by(AllAddrInfo, [](const AddrInfo &AI) { - return FileLoc{AI.FileName, AI.Line}; - }); - auto CovLocs = group_by(CovAddrInfo, [](const AddrInfo &AI) { - return FileLoc{AI.FileName, AI.Line}; - }); - - for (const auto &P : AllLocs) { - const FileLoc &Loc = P.first; - auto I = CovLocs.find(Loc); - - if (I == CovLocs.end()) { - StatusMap[Loc.FileName][Loc.Line] = NOT_COVERED; - } else { - StatusMap[Loc.FileName][Loc.Line] = - (I->second.size() == P.second.size()) ? COVERED : MIXED; - } +static std::set +computeFunctions(const std::vector &Points) { + std::set Fns; + for (const auto &Point : Points) { + for (const auto &Loc : Point.Locs) { + Fns.insert(FileFn{Loc.FileName, Loc.FunctionName}); } - return StatusMap; } + return Fns; +} - std::set computeAllFunctions() const { - std::set Fns; - for (const auto &AI : AllAddrInfo) { - Fns.insert(FileFn{AI.FileName, AI.FunctionName}); - } - return Fns; - } +static std::set +computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) { + auto Fns = computeFunctions(Coverage.Points); - std::set computeCoveredFunctions() const { - std::set Fns; - auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) { - return FileFn{AI.FileName, AI.FunctionName}; - }); + for (const auto &Point : Coverage.Points) { + if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) + continue; - for (const auto &P : CovFns) { - Fns.insert(P.first); + for (const auto &Loc : Point.Locs) { + Fns.erase(FileFn{Loc.FileName, Loc.FunctionName}); } - return Fns; } - std::set computeNotCoveredFunctions() const { - std::set Fns; - - auto AllFns = group_by(AllAddrInfo, [](const AddrInfo &AI) { - return FileFn{AI.FileName, AI.FunctionName}; - }); - auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) { - return FileFn{AI.FileName, AI.FunctionName}; - }); - - for (const auto &P : AllFns) { - if (CovFns.find(P.first) == CovFns.end()) { - Fns.insert(P.first); - } - } - return Fns; - } + return Fns; +} - // Compute % coverage for each function. - std::map computeFunctionsCoverage() const { - std::map FnCoverage; - auto AllFns = group_by(AllAddrInfo, [](const AddrInfo &AI) { - return FileFn{AI.FileName, AI.FunctionName}; - }); +static std::set +computeCoveredFunctions(const SymbolizedCoverage &Coverage) { + auto AllFns = computeFunctions(Coverage.Points); + std::set Result; - auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) { - return FileFn{AI.FileName, AI.FunctionName}; - }); + for (const auto &Point : Coverage.Points) { + if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end()) + continue; - for (const auto &P : AllFns) { - FileFn F = P.first; - FnCoverage[F] = CovFns[F].size() * 100 / P.second.size(); + for (const auto &Loc : Point.Locs) { + Result.insert(FileFn{Loc.FileName, Loc.FunctionName}); } - - return FnCoverage; } - typedef std::map> FunctionLocs; - // finds first line number in a file for each function. - FunctionLocs resolveFunctions(const std::set &Fns) const { - std::vector FnAddrs; - for (const auto &AI : AllAddrInfo) { - if (Fns.find(FileFn{AI.FileName, AI.FunctionName}) != Fns.end()) - FnAddrs.push_back(AI); - } - - auto GroupedAddrs = group_by(FnAddrs, [](const AddrInfo &AI) { - return FnLoc{FileLoc{AI.FileName, AI.Line}, AI.FunctionName}; - }); - - FunctionLocs Result; - std::string LastFileName; - std::set ProcessedFunctions; - - for (const auto &P : GroupedAddrs) { - const FnLoc &Loc = P.first; - std::string FileName = Loc.Loc.FileName; - std::string FunctionName = Loc.FunctionName; - - if (LastFileName != FileName) - ProcessedFunctions.clear(); - LastFileName = FileName; + return Result; +} - if (!ProcessedFunctions.insert(FunctionName).second) +typedef std::map> FunctionLocs; +// finds first location in a file for each function. +static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage, + const std::set &Fns) { + FunctionLocs Result; + for (const auto &Point : Coverage.Points) { + for (const auto &Loc : Point.Locs) { + FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName}; + if (Fns.find(Fn) == Fns.end()) continue; - auto FLoc = FileLoc{FileName, Loc.Loc.Line}; - Result[FLoc].insert(FunctionName); - } - return Result; - } - - std::set files() const { - std::set Files; - for (const auto &AI : AllAddrInfo) { - Files.insert(AI.FileName); - } - return Files; - } - - void collectStats(CoverageStats *Stats) const { - Stats->AllPoints += AllCovPoints.size(); - Stats->AllFns += computeAllFunctions().size(); - Stats->CovFns += computeCoveredFunctions().size(); - } - -private: - const std::set AllCovPoints; - - std::vector AllAddrInfo; - std::vector CovAddrInfo; -}; - -static void printFunctionLocs(const SourceCoverageData::FunctionLocs &FnLocs, - raw_ostream &OS) { - for (const auto &Fns : FnLocs) { - for (const auto &Fn : Fns.second) { - OS << stripPathPrefix(Fns.first.FileName) << ":" << Fns.first.Line << " " - << Fn << "\n"; + auto P = std::make_pair(Loc.Line, Loc.Column); + auto I = Result.find(Fn); + if (I == Result.end() || I->second > P) { + Result[Fn] = P; + } } } + return Result; } -// Holder for coverage data + filename of corresponding object file. -class CoverageDataWithObjectFile : public CoverageData { -public: - static ErrorOr> - readAndMerge(const std::string &ObjectFile, - const std::vector &FileNames) { - auto MergedDataOrError = CoverageData::readAndMerge(FileNames); - if (!MergedDataOrError) - return MergedDataOrError.getError(); - return std::unique_ptr( - new CoverageDataWithObjectFile(ObjectFile, - std::move(MergedDataOrError.get()))); +static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) { + for (const auto &P : FnLocs) { + OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " " + << P.first.FunctionName << "\n"; } +} +CoverageStats computeStats(const SymbolizedCoverage &Coverage) { + CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(), + computeFunctions(Coverage.Points).size(), + computeCoveredFunctions(Coverage).size()}; + return Stats; +} - std::string object_file() const { return ObjectFile; } +// Print list of covered functions. +// Line format: : +static void printCoveredFunctions(const SymbolizedCoverage &CovData, + raw_ostream &OS) { + auto CoveredFns = computeCoveredFunctions(CovData); + printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS); +} - // Print list of covered functions. - // Line format: : - void printCoveredFunctions(raw_ostream &OS) const { - SourceCoverageData SCovData(ObjectFile, *Addrs); - auto CoveredFns = SCovData.computeCoveredFunctions(); - printFunctionLocs(SCovData.resolveFunctions(CoveredFns), OS); - } +// Print list of not covered functions. +// Line format: : +static void printNotCoveredFunctions(const SymbolizedCoverage &CovData, + raw_ostream &OS) { + auto NotCoveredFns = computeNotCoveredFunctions(CovData); + printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS); +} - // Print list of not covered functions. - // Line format: : - void printNotCoveredFunctions(raw_ostream &OS) const { - SourceCoverageData SCovData(ObjectFile, *Addrs); - auto NotCoveredFns = SCovData.computeNotCoveredFunctions(); - printFunctionLocs(SCovData.resolveFunctions(NotCoveredFns), OS); +// Read list of files and merges their coverage info. +static void readAndPrintRawCoverage(const std::vector &FileNames, + raw_ostream &OS) { + std::vector> Covs; + for (const auto &FileName : FileNames) { + auto Cov = RawCoverage::read(FileName); + if (!Cov) + continue; + OS << *Cov.get(); } +} - void printReport(raw_ostream &OS) const { - SourceCoverageData SCovData(ObjectFile, *Addrs); - auto LineStatusMap = SCovData.computeLineStatusMap(); - - std::set AllFns = SCovData.computeAllFunctions(); - // file_loc -> set[function_name] - auto AllFnsByLoc = SCovData.resolveFunctions(AllFns); - auto FileCoverage = SCovData.computeFileCoverage(); - - auto FnCoverage = SCovData.computeFunctionsCoverage(); - auto FnCoverageByFile = - group_by(FnCoverage, [](const std::pair &FileFn) { - return FileFn.first.FileName; - }); - - // TOC - - size_t NotCoveredFilesCount = 0; - std::set Files = SCovData.files(); - - // Covered Files. - OS << "
Touched Files\n"; - OS << "\n"; - OS << ""; - OS << "\n"; - for (const auto &FileName : Files) { - std::pair FC = FileCoverage[FileName]; - if (FC.first == 0) { - NotCoveredFilesCount++; - continue; - } - size_t CovPct = FC.second == 0 ? 100 : 100 * FC.first / FC.second; - - OS << "" - << "" - << "\n"; - } - OS << "
FileCoverage %Hit (Total) Fns
" - << stripPathPrefix(FileName) << "" << formatHtmlPct(CovPct) << "%" << FC.first << " (" << FC.second << ")" - << "
\n"; - OS << "
\n"; - - // Not covered files. - if (NotCoveredFilesCount) { - OS << "
Not Touched Files\n"; - OS << "\n"; - for (const auto &FileName : Files) { - std::pair FC = FileCoverage[FileName]; - if (FC.first == 0) - OS << "\n"; - } - OS << "
" << stripPathPrefix(FileName) << "
\n"; - OS << "
\n"; - } else { - OS << "

Congratulations! All source files are touched.

\n"; +static std::unique_ptr +merge(const std::vector> &Coverages) { + auto Result = make_unique(); + + for (size_t I = 0; I < Coverages.size(); ++I) { + const SymbolizedCoverage &Coverage = *Coverages[I]; + std::string Prefix; + if (Coverages.size() > 1) { + // prefix is not needed when there's only one file. + Prefix = + (Coverage.BinaryHash.size() ? Coverage.BinaryHash : utostr(I)) + ":"; } - // Source - for (const auto &FileName : Files) { - std::pair FC = FileCoverage[FileName]; - if (FC.first == 0) - continue; - OS << "\n"; - OS << "

" << stripPathPrefix(FileName) << "

\n"; - OS << "
Function Coverage"; - OS << "
\n"; - - auto &FileFnCoverage = FnCoverageByFile[FileName]; - - for (const auto &P : FileFnCoverage) { - std::string FunctionName = P.first.FunctionName; - - OS << "
"; - OS << "" << formatHtmlPct(P.second) - << "% "; - OS << ""; - OS << escapeHtml(FunctionName) << ""; - OS << "
\n"; - } - OS << "
\n"; - - ErrorOr> BufOrErr = - MemoryBuffer::getFile(FileName); - if (!BufOrErr) { - OS << "Error reading file: " << FileName << " : " - << BufOrErr.getError().message() << "(" - << BufOrErr.getError().value() << ")\n"; - continue; - } - - OS << "
\n";
-      const auto &LineStatuses = LineStatusMap[FileName];
-      for (line_iterator I = line_iterator(*BufOrErr.get(), false);
-           !I.is_at_eof(); ++I) {
-        uint32_t Line = I.line_number();
-        { // generate anchors (if any);
-          FileLoc Loc = FileLoc{FileName, Line};
-          auto It = AllFnsByLoc.find(Loc);
-          if (It != AllFnsByLoc.end()) {
-            for (const std::string &Fn : It->second) {
-              OS << "";
-            };
-          }
-        }
+    for (const auto &Id : Coverage.CoveredIds) {
+      Result->CoveredIds.insert(Prefix + Id);
+    }
 
-        OS << "second
-                                                  : SourceCoverageData::UNKNOWN;
-        switch (Status) {
-        case SourceCoverageData::UNKNOWN:
-          OS << "class=unknown";
-          break;
-        case SourceCoverageData::COVERED:
-          OS << "class=covered";
-          break;
-        case SourceCoverageData::NOT_COVERED:
-          OS << "class=notcovered";
-          break;
-        case SourceCoverageData::MIXED:
-          OS << "class=mixed";
-          break;
-        }
-        OS << ">";
-        OS << escapeHtml(*I) << "\n";
-      }
-      OS << "
\n"; + for (const auto &CovPoint : Coverage.Points) { + CoveragePoint NewPoint(CovPoint); + NewPoint.Id = Prefix + CovPoint.Id; + Result->Points.push_back(NewPoint); } } - void collectStats(CoverageStats *Stats) const { - Stats->CovPoints += Addrs->size(); - - SourceCoverageData SCovData(ObjectFile, *Addrs); - SCovData.collectStats(Stats); + if (Coverages.size() == 1) { + Result->BinaryHash = Coverages[0]->BinaryHash; } -private: - CoverageDataWithObjectFile(std::string ObjectFile, - std::unique_ptr Coverage) - : CoverageData(std::move(Coverage->Addrs)), - ObjectFile(std::move(ObjectFile)) {} - const std::string ObjectFile; -}; + return Result; +} -// Multiple coverage files data organized by object file. -class CoverageDataSet { -public: - static ErrorOr> - readCmdArguments(std::vector FileNames) { +static std::unique_ptr +readSymbolizeAndMergeCmdArguments(std::vector FileNames) { + std::vector> Coverages; + + { // Short name => file name. std::map ObjFiles; std::string FirstObjFile; @@ -1109,6 +1087,10 @@ public: // Partition input values into coverage/object files. for (const auto &FileName : FileNames) { + if (isSymbolizedCoverageFile(FileName)) { + Coverages.push_back(SymbolizedCoverage::read(FileName)); + } + auto ErrorOrIsCoverage = isCoverageFile(FileName); if (!ErrorOrIsCoverage) continue; @@ -1117,7 +1099,7 @@ public: } else { auto ShortFileName = llvm::sys::path::filename(FileName); if (ObjFiles.find(ShortFileName) != ObjFiles.end()) { - Fail("Duplicate binary file with a short name: " + ShortFileName); + fail("Duplicate binary file with a short name: " + ShortFileName); } ObjFiles[ShortFileName] = FileName; @@ -1126,28 +1108,28 @@ public: } } - Regex SancovRegex("(.*)\\.[0-9]+\\.sancov"); SmallVector Components; // Object file => list of corresponding coverage file names. - auto CoverageByObjFile = group_by(CovFiles, [&](std::string FileName) { + std::map> CoverageByObjFile; + for (const auto &FileName : CovFiles) { auto ShortFileName = llvm::sys::path::filename(FileName); - auto Ok = SancovRegex.match(ShortFileName, &Components); + auto Ok = SancovFileRegex.match(ShortFileName, &Components); if (!Ok) { - Fail("Can't match coverage file name against " + fail("Can't match coverage file name against " "..sancov pattern: " + FileName); } auto Iter = ObjFiles.find(Components[1]); if (Iter == ObjFiles.end()) { - Fail("Object file for coverage not found: " + FileName); + fail("Object file for coverage not found: " + FileName); } - return Iter->second; - }); - // Read coverage. - std::vector> MergedCoverage; + CoverageByObjFile[Iter->second].push_back(FileName); + }; + + // Read raw coverage and symbolize it. for (const auto &Pair : CoverageByObjFile) { if (findSanitizerCovFunctions(Pair.first).empty()) { for (const auto &FileName : Pair.second) { @@ -1161,132 +1143,34 @@ public: continue; } - auto DataOrError = - CoverageDataWithObjectFile::readAndMerge(Pair.first, Pair.second); - FailIfError(DataOrError); - MergedCoverage.push_back(std::move(DataOrError.get())); - } - - return std::unique_ptr( - new CoverageDataSet(FirstObjFile, &MergedCoverage, CovFiles)); - } - - void printCoveredFunctions(raw_ostream &OS) const { - for (const auto &Cov : Coverage) { - Cov->printCoveredFunctions(OS); - } - } - - void printNotCoveredFunctions(raw_ostream &OS) const { - for (const auto &Cov : Coverage) { - Cov->printNotCoveredFunctions(OS); - } - } - - void printStats(raw_ostream &OS) const { - CoverageStats Stats; - for (const auto &Cov : Coverage) { - Cov->collectStats(&Stats); - } - OS << Stats; - } - - void printReport(raw_ostream &OS) const { - auto Title = - (llvm::sys::path::filename(MainObjFile) + " Coverage Report").str(); - - OS << "\n"; - OS << "\n"; - - // Stylesheet - OS << "\n"; - OS << "" << Title << "\n"; - OS << "\n"; - OS << "\n"; - - // Title - OS << "

" << Title << "

\n"; - - // Modules TOC. - if (Coverage.size() > 1) { - for (const auto &CovData : Coverage) { - OS << "
  • object_file()) - << "\">" << llvm::sys::path::filename(CovData->object_file()) - << "
  • \n"; - } - } - - for (const auto &CovData : Coverage) { - if (Coverage.size() > 1) { - OS << "

    " << llvm::sys::path::filename(CovData->object_file()) - << "

    \n"; + for (const std::string &CoverageFile : Pair.second) { + auto DataOrError = RawCoverage::read(CoverageFile); + failIfError(DataOrError); + Coverages.push_back(symbolize(*DataOrError.get(), Pair.first)); } - OS << "object_file()) - << "\">\n"; - CovData->printReport(OS); } - - // About - OS << "
    About\n"; - OS << "Coverage files:
      "; - for (const auto &InputFile : CoverageFiles) { - llvm::sys::fs::file_status Status; - llvm::sys::fs::status(InputFile, Status); - OS << "
    • " << stripPathPrefix(InputFile) << " (" - << Status.getLastModificationTime().str() << ")
    • \n"; - } - OS << "
    \n"; - - OS << "\n"; - OS << "\n"; } - bool empty() const { return Coverage.empty(); } - -private: - explicit CoverageDataSet( - const std::string &MainObjFile, - std::vector> *Data, - const std::set &CoverageFiles) - : MainObjFile(MainObjFile), CoverageFiles(CoverageFiles) { - Data->swap(this->Coverage); - } - - const std::string MainObjFile; - std::vector> Coverage; - const std::set CoverageFiles; -}; + return merge(Coverages); +} } // namespace -int main(int argc, char **argv) { +int main(int Argc, char **Argv) { // Print stack trace if we signal out. - sys::PrintStackTraceOnErrorSignal(argv[0]); - PrettyStackTraceProgram X(argc, argv); + sys::PrintStackTraceOnErrorSignal(Argv[0]); + PrettyStackTraceProgram X(Argc, Argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllDisassemblers(); - cl::ParseCommandLineOptions(argc, argv, "Sanitizer Coverage Processing Tool"); + cl::ParseCommandLineOptions(Argc, Argv, "Sanitizer Coverage Processing Tool"); // -print doesn't need object files. if (Action == PrintAction) { - auto CovData = CoverageData::readAndMerge(ClInputFiles); - FailIfError(CovData); - CovData.get()->printAddrs(outs()); + readAndPrintRawCoverage(ClInputFiles, outs()); return 0; } else if (Action == PrintCovPointsAction) { // -print-coverage-points doesn't need coverage files. @@ -1296,30 +1180,32 @@ int main(int argc, char **argv) { return 0; } - auto CovDataSet = CoverageDataSet::readCmdArguments(ClInputFiles); - FailIfError(CovDataSet); - - if (CovDataSet.get()->empty()) { - Fail("No coverage files specified."); - } + auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles); + failIf(!Coverage, "No valid coverage files given."); switch (Action) { case CoveredFunctionsAction: { - CovDataSet.get()->printCoveredFunctions(outs()); + printCoveredFunctions(*Coverage, outs()); return 0; } case NotCoveredFunctionsAction: { - CovDataSet.get()->printNotCoveredFunctions(outs()); + printNotCoveredFunctions(*Coverage, outs()); return 0; } - case HtmlReportAction: { - CovDataSet.get()->printReport(outs()); + case StatsAction: { + outs() << computeStats(*Coverage); return 0; } - case StatsAction: { - CovDataSet.get()->printStats(outs()); + case MergeAction: + case SymbolizeAction: { // merge & symbolize are synonims. + JSONWriter W(outs()); + W << *Coverage; return 0; } + case HtmlReportAction: + errs() << "-html-report option is removed: " + "use -symbolize & symcov-report-server.py instead\n"; + return 1; case PrintAction: case PrintCovPointsAction: llvm_unreachable("unsupported action");