From: Dmitriy Nikiforov Date: Wed, 7 Jun 2017 17:54:21 +0000 (+0300) Subject: [sanitizer-coverage] Add script for coverage symbolization X-Git-Tag: submit/tizen/20170714.013956~11 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f0b9ec04d64e89d34a38727ecfacee0132379416;p=tools%2FlibFuzzer.git [sanitizer-coverage] Add script for coverage symbolization --- diff --git a/scripts/sancov_symbolize.py b/scripts/sancov_symbolize.py new file mode 100755 index 0000000..90490df --- /dev/null +++ b/scripts/sancov_symbolize.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python2 +""" +Script for 'symcov' files generation for coverage-report-server.py script. + +Pre-requisites: + 1. addr2line present in your PATH. + 2. sancov.py present in your PATH. +""" +import subprocess +import sys +import collections +import hashlib +import json +import re +import os + +SANCOV_EXEC = 'sancov.py' +SYMBOLIZER_EXEC = 'addr2line' +BUF_SIZE = 65536 +PROG_NAME = "" + +Location = collections.namedtuple('Location', ['filename', 'fun', 'line']) +CoveragePoint = collections.namedtuple('CoveragePoint', ['locs', 'point_id']) +SymbolizedCoverage = collections.namedtuple( + 'SymbolizedCoverage', ['covered_ids', 'binary_hash', 'points']) + + +def usage(): + """ + Prints 'Usage' message and exits with error. + """ + sys.stderr.write('usage: ' + PROG_NAME + + ' DUMP BINARY [DUMP BINARY [...]]\n') + exit(1) + + +def which(binary): + """ + Simple analogue to Python3 shutil.which(). + """ + paths = os.getenv('PATH') + for path in paths.split(os.path.pathsep): + path = os.path.join(path, binary) + if os.path.exists(path) and os.access(path, os.X_OK): + return path + + +def print_symcov(args): + """ + Generates and writes 'symcov' file to stdout. + + Keyword arguments: + args -- dictionary with paths to SanitizerCoverage dump files as keys and + paths to corresponding binaries as values. + """ + coverages = [] + for sancov_dump, binary in args.items(): + sha1 = hashlib.sha1() + with open(binary, 'rb') as binary_file: + while True: + data = binary_file.read(BUF_SIZE) + if not data: + break + sha1.update(data) + coverages.append( + get_symbolized_coverage(binary, sancov_dump, sha1.hexdigest())) + + serialized_coverage = serialize_coverage(coverages) + sys.stdout.write( + json.dumps( + serialized_coverage, + separators=(',', ': '), + indent=4, + sort_keys=True) + '\n') + + +def parse_symbolizer_output(cmd, regexp, binary_hash): + """ + Executes command and parses its output. + + Runs process with specified command line arguments and parses its output + using provided regular expression. + + Returns list of CoveragePoint objects (can be empty). + + Keyword arguments: + cmd -- command line arguments for subprocess. + regexp -- regular expression to use for subprocess output parsing. + binary_hash -- hash of the binary. It will be used to generate IDs for + coverage points. + """ + try: + proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + except subprocess.CalledProcessError as err: + sys.stderr.write(err.output.decode()) + exit(err.returncode) + + points = [] + + for line in proc.stdout: + line = line.decode().strip() + if not line: + continue + + match = regexp.match(line) + if match is None: + continue + + point_id = binary_hash[:5] + '-' + match.group(1) + loc = Location(match.group(3), match.group(2), match.group(4)) + point = next((p for p in points if p.point_id == point_id), None) + if point is None: + point = CoveragePoint([loc], point_id) + else: + point.locs.append(loc) + points.append(point) + + return points + + +def get_symbolized_coverage(binary, sancov_dump, binary_hash): + """ + Creates SymbolizedCoverage object. + + Returns created SymbolizedCoverage object. + + Keyword arguments: + binary -- path to covered binary file. + sancov_dump -- path to SanitizerCoverage dump file corresponding to binary. + binary_hash -- hash of the binary. + """ + if which(SANCOV_EXEC) is None: + sys.stderr.write(SANCOV_EXEC + ': no such file\n') + exit(1) + if which(SYMBOLIZER_EXEC) is None: + sys.stderr.write(SYMBOLIZER_EXEC + ': no such file\n') + exit(1) + + regexp = re.compile(r'^(0x[0-9a-fA-F]*): (\w*) at (.*):(\d*)$') + + cmd = '{0} print {1} | {2} -e {3} -afpC'.format(SANCOV_EXEC, sancov_dump, + SYMBOLIZER_EXEC, binary) + covered_points = parse_symbolizer_output(cmd, regexp, binary_hash) + + cmd = '{0} print {1} | {0} missing {3} | {2} -e {3} -afpC'.format( + SANCOV_EXEC, sancov_dump, SYMBOLIZER_EXEC, binary) + not_covered_points = parse_symbolizer_output(cmd, regexp, binary_hash) + + covered_ids = [point.point_id for point in covered_points] + + return SymbolizedCoverage(covered_ids, binary_hash, + covered_points + not_covered_points) + + +def serialize_coverage(coverages): + """ + Serializes list of SymbolizedCoverage to JSON format. + + Converts SymbolizedCoverage objects to nested dictionaries and lists. + + Returns serialized object. + + Keyword arguments: + coverages -- list of SymbolizedCoverage objects. + """ + serialized = {} + + serialized['binary-hash'] = [] + serialized['covered-points'] = [] + serialized['point-symbol-info'] = [] + + all_points = [] + for coverage in coverages: + for covered_id in coverage.covered_ids: + serialized['covered-points'].append(covered_id) + + serialized['binary-hash'].append(coverage.binary_hash) + all_points.extend(coverage.points) + + serialized['point-symbol-info'] = serialize_all_points(all_points) + + return serialized + + +def serialize_all_points(coverage_points): + """ + Serializes list of CoveragePoint to JSON format. + + Converts CoveragePoint objects to nested dictionaries and lists. + + Returns serialized object. + + Keyword arguments: + coverage_points -- list of CoveragePoint objects. + """ + points_by_file = {} + for point in coverage_points: + for loc in point.locs: + if loc.filename not in points_by_file: + points_by_file[loc.filename] = [] + points_by_file[loc.filename].append(point) + + serialized = {} + for filename, file_points in points_by_file.items(): + serialized[filename] = {} + points_by_fn = {} + for point in file_points: + for loc in point.locs: + if loc.fun not in points_by_fn: + points_by_fn[loc.fun] = [] + points_by_fn[loc.fun].append(point) + + for fun, fun_points in points_by_fn.items(): + written_ids = [] + serialized[filename][fun] = {} + for point in fun_points: + for loc in point.locs: + if loc.filename != filename or loc.fun != fun: + continue + + if point.point_id in written_ids: + continue + + written_ids.append(point.point_id) + serialized[filename][fun][point.point_id] = loc.line + + return serialized + + +if __name__ == '__main__': + PROG_NAME = sys.argv[0] + ARGC = len(sys.argv) + if ARGC < 3 or ARGC % 2 != 1: + usage() + + SANCOV_DUMPS = [] + BINARIES = [] + for i in range(1, ARGC, 2): + SANCOV_DUMPS.append(os.path.abspath(sys.argv[i])) + BINARIES.append(os.path.abspath(sys.argv[i + 1])) + + print_symcov(dict(zip(SANCOV_DUMPS, BINARIES)))