From e13eff293db2fa12de11e8087ef62950d0cd8f83 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 8 May 2019 17:20:09 +0000 Subject: [PATCH] [libFuzzer] DFT: when dumping coverage, also dump the total number of instrumented blocks in a function; update merge_data_flow.py to merge coverage llvm-svn: 360272 --- compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp | 2 + compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp | 47 ++++++++++++----------- compiler-rt/lib/fuzzer/scripts/merge_data_flow.py | 17 ++++++++ compiler-rt/test/fuzzer/dataflow.test | 4 +- 4 files changed, 46 insertions(+), 24 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp index 5ae7510..604fe15 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -52,6 +52,8 @@ void DataFlowTrace::Init(const std::string &DirPath, // Printf("=== %s\n", Name.c_str()); std::ifstream IF(SF.File); while (std::getline(IF, L, '\n')) { + if (!L.empty() && L[0] == 'C') + continue; // Ignore coverage. size_t SpacePos = L.find(' '); if (SpacePos == std::string::npos) return ParseError("no space in the trace line"); diff --git a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp index d51c3f0..989675e 100644 --- a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp +++ b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp @@ -44,14 +44,15 @@ // =============== // F0 11111111111111 // F1 10000000000000 -// C0 1 2 3 4 -// C1 +// C0 1 2 3 4 5 +// C1 8 // =============== // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. // The byte string is LEN+1 bytes. The last byte is set if the function // depends on the input length. -// "CN X Y Z": tells that a function N has basic blocks X, Y, and Z covered -// in addition to the function's entry block. +// "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered +// in addition to the function's entry block, out of T total instrumented +// blocks. // //===----------------------------------------------------------------------===*/ @@ -87,6 +88,10 @@ enum { PCFLAG_FUNC_ENTRY = 1, }; +static inline bool BlockIsEntry(size_t BlockIdx) { + return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY; +} + // Prints all instrumented functions. static int PrintFunctions() { // We don't have the symbolizer integrated with dfsan yet. @@ -99,8 +104,7 @@ static int PrintFunctions() { "| sed 's/dfs\\$//g'", "w"); for (size_t I = 0; I < NumGuards; I++) { uintptr_t PC = PCsBeg[I * 2]; - uintptr_t PCFlags = PCsBeg[I * 2 + 1]; - if (!(PCFlags & PCFLAG_FUNC_ENTRY)) continue; + if (!BlockIsEntry(I)) continue; void *const Buf[1] = {(void*)PC}; backtrace_symbols_fd(Buf, 1, fileno(Pipe)); } @@ -142,23 +146,22 @@ static void PrintDataFlow(FILE *Out) { static void PrintCoverage(FILE *Out) { ssize_t CurrentFuncGuard = -1; ssize_t CurrentFuncNum = -1; - int NumFuncsCovered = 0; - for (size_t I = 0; I < NumGuards; I++) { - bool IsEntry = PCsBeg[I * 2 + 1] & PCFLAG_FUNC_ENTRY; - if (IsEntry) { - CurrentFuncNum++; - CurrentFuncGuard = I; - } - if (!BBExecuted[I]) continue; - if (IsEntry) { - if (NumFuncsCovered) fprintf(Out, "\n"); - fprintf(Out, "C%zd ", CurrentFuncNum); - NumFuncsCovered++; - } else { - fprintf(Out, "%zd ", I - CurrentFuncGuard); + ssize_t NumBlocksInCurrentFunc = -1; + for (size_t FuncBeg = 0; FuncBeg < NumGuards;) { + CurrentFuncNum++; + assert(BlockIsEntry(FuncBeg)); + size_t FuncEnd = FuncBeg + 1; + for (; FuncEnd < NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++) + ; + if (BBExecuted[FuncBeg]) { + fprintf(Out, "C%zd", CurrentFuncNum); + for (size_t I = FuncBeg + 1; I < FuncEnd; I++) + if (BBExecuted[I]) + fprintf(Out, " %zd", I - FuncBeg); + fprintf(Out, " %zd\n", FuncEnd - FuncBeg); } + FuncBeg = FuncEnd; } - fprintf(Out, "\n"); } int main(int argc, char **argv) { @@ -229,7 +232,7 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, PCsEnd = pcs_end; assert(NumGuards == (PCsEnd - PCsBeg) / 2); for (size_t i = 0; i < NumGuards; i++) { - if (PCsBeg[i * 2 + 1] & PCFLAG_FUNC_ENTRY) { + if (BlockIsEntry(i)) { NumFuncs++; GuardsBeg[i] = NumFuncs; } diff --git a/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py b/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py index d6000fa..9f69018 100755 --- a/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py +++ b/compiler-rt/lib/fuzzer/scripts/merge_data_flow.py @@ -22,20 +22,37 @@ def Merge(a, b): def main(argv): D = {} + C = {} + # read the lines. for line in fileinput.input(): + # collect the coverage. if line.startswith('C'): + COV = line.strip().split(' ') + F = COV[0]; + if not F in C: + C[F] = {0} + for B in COV[1:]: + C[F].add(int(B)) continue + # collect the data flow trace. [F,BV] = line.strip().split(' ') if F in D: D[F] = Merge(D[F], BV) else: D[F] = BV; + # print the combined data flow trace. for F in D.keys(): if isinstance(D[F], str): value = D[F] else: value = D[F].decode('utf-8') print("%s %s" % (F, value)) + # print the combined coverage + for F in C.keys(): + print("%s" % F, end="") + for B in list(C[F])[1:]: + print(" %s" % B, end="") + print() if __name__ == '__main__': main(sys.argv) diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test index 11f2fef..6f32c38 100644 --- a/compiler-rt/test/fuzzer/dataflow.test +++ b/compiler-rt/test/fuzzer/dataflow.test @@ -94,8 +94,8 @@ RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN % RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2' USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT -USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001| -USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011| +USE_DATA_FLOW_TRACE-DAG: ca8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001| +USE_DATA_FLOW_TRACE-DAG: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011| USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function # Test that we can run collect_data_flow on a long input (>2**16 bytes) -- 2.7.4