[sanitizer-coverage] Add script for coverage symbolization
authorDmitriy Nikiforov <d.nikiforov@partner.samsung.com>
Wed, 7 Jun 2017 17:54:21 +0000 (20:54 +0300)
committerMaria Guseva <m.guseva@samsung.com>
Tue, 11 Jul 2017 02:46:03 +0000 (11:46 +0900)
scripts/sancov_symbolize.py [new file with mode: 0755]

diff --git a/scripts/sancov_symbolize.py b/scripts/sancov_symbolize.py
new file mode 100755 (executable)
index 0000000..90490df
--- /dev/null
@@ -0,0 +1,243 @@
+#!/usr/bin/env python2
+"""
+Script for 'symcov' files generation for coverage-report-server.py script.
+
+Pre-requisites:
+  1. addr2line present in your PATH.
+  2. sancov.py present in your PATH.
+"""
+import subprocess
+import sys
+import collections
+import hashlib
+import json
+import re
+import os
+
+SANCOV_EXEC = 'sancov.py'
+SYMBOLIZER_EXEC = 'addr2line'
+BUF_SIZE = 65536
+PROG_NAME = ""
+
+Location = collections.namedtuple('Location', ['filename', 'fun', 'line'])
+CoveragePoint = collections.namedtuple('CoveragePoint', ['locs', 'point_id'])
+SymbolizedCoverage = collections.namedtuple(
+    'SymbolizedCoverage', ['covered_ids', 'binary_hash', 'points'])
+
+
+def usage():
+    """
+    Prints 'Usage' message and exits with error.
+    """
+    sys.stderr.write('usage: ' + PROG_NAME +
+                     ' DUMP BINARY [DUMP BINARY [...]]\n')
+    exit(1)
+
+
+def which(binary):
+    """
+    Simple analogue to Python3 shutil.which().
+    """
+    paths = os.getenv('PATH')
+    for path in paths.split(os.path.pathsep):
+        path = os.path.join(path, binary)
+        if os.path.exists(path) and os.access(path, os.X_OK):
+            return path
+
+
+def print_symcov(args):
+    """
+    Generates and writes 'symcov' file to stdout.
+
+    Keyword arguments:
+    args -- dictionary with paths to SanitizerCoverage dump files as keys and
+            paths to corresponding binaries as values.
+    """
+    coverages = []
+    for sancov_dump, binary in args.items():
+        sha1 = hashlib.sha1()
+        with open(binary, 'rb') as binary_file:
+            while True:
+                data = binary_file.read(BUF_SIZE)
+                if not data:
+                    break
+                sha1.update(data)
+        coverages.append(
+            get_symbolized_coverage(binary, sancov_dump, sha1.hexdigest()))
+
+    serialized_coverage = serialize_coverage(coverages)
+    sys.stdout.write(
+        json.dumps(
+            serialized_coverage,
+            separators=(',', ': '),
+            indent=4,
+            sort_keys=True) + '\n')
+
+
+def parse_symbolizer_output(cmd, regexp, binary_hash):
+    """
+    Executes command and parses its output.
+
+    Runs process with specified command line arguments and parses its output
+    using provided regular expression.
+
+    Returns list of CoveragePoint objects (can be empty).
+
+    Keyword arguments:
+    cmd         -- command line arguments for subprocess.
+    regexp      -- regular expression to use for subprocess output parsing.
+    binary_hash -- hash of the binary. It will be used to generate IDs for
+                   coverage points.
+    """
+    try:
+        proc = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    except subprocess.CalledProcessError as err:
+        sys.stderr.write(err.output.decode())
+        exit(err.returncode)
+
+    points = []
+
+    for line in proc.stdout:
+        line = line.decode().strip()
+        if not line:
+            continue
+
+        match = regexp.match(line)
+        if match is None:
+            continue
+
+        point_id = binary_hash[:5] + '-' + match.group(1)
+        loc = Location(match.group(3), match.group(2), match.group(4))
+        point = next((p for p in points if p.point_id == point_id), None)
+        if point is None:
+            point = CoveragePoint([loc], point_id)
+        else:
+            point.locs.append(loc)
+        points.append(point)
+
+    return points
+
+
+def get_symbolized_coverage(binary, sancov_dump, binary_hash):
+    """
+    Creates SymbolizedCoverage object.
+
+    Returns created SymbolizedCoverage object.
+
+    Keyword arguments:
+    binary      -- path to covered binary file.
+    sancov_dump -- path to SanitizerCoverage dump file corresponding to binary.
+    binary_hash -- hash of the binary.
+    """
+    if which(SANCOV_EXEC) is None:
+        sys.stderr.write(SANCOV_EXEC + ': no such file\n')
+        exit(1)
+    if which(SYMBOLIZER_EXEC) is None:
+        sys.stderr.write(SYMBOLIZER_EXEC + ': no such file\n')
+        exit(1)
+
+    regexp = re.compile(r'^(0x[0-9a-fA-F]*): (\w*) at (.*):(\d*)$')
+
+    cmd = '{0} print {1} | {2} -e {3} -afpC'.format(SANCOV_EXEC, sancov_dump,
+                                                    SYMBOLIZER_EXEC, binary)
+    covered_points = parse_symbolizer_output(cmd, regexp, binary_hash)
+
+    cmd = '{0} print {1} | {0} missing {3} | {2} -e {3} -afpC'.format(
+        SANCOV_EXEC, sancov_dump, SYMBOLIZER_EXEC, binary)
+    not_covered_points = parse_symbolizer_output(cmd, regexp, binary_hash)
+
+    covered_ids = [point.point_id for point in covered_points]
+
+    return SymbolizedCoverage(covered_ids, binary_hash,
+                              covered_points + not_covered_points)
+
+
+def serialize_coverage(coverages):
+    """
+    Serializes list of SymbolizedCoverage to JSON format.
+
+    Converts SymbolizedCoverage objects to nested dictionaries and lists.
+
+    Returns serialized object.
+
+    Keyword arguments:
+    coverages -- list of SymbolizedCoverage objects.
+    """
+    serialized = {}
+
+    serialized['binary-hash'] = []
+    serialized['covered-points'] = []
+    serialized['point-symbol-info'] = []
+
+    all_points = []
+    for coverage in coverages:
+        for covered_id in coverage.covered_ids:
+            serialized['covered-points'].append(covered_id)
+
+        serialized['binary-hash'].append(coverage.binary_hash)
+        all_points.extend(coverage.points)
+
+    serialized['point-symbol-info'] = serialize_all_points(all_points)
+
+    return serialized
+
+
+def serialize_all_points(coverage_points):
+    """
+    Serializes list of CoveragePoint to JSON format.
+
+    Converts CoveragePoint objects to nested dictionaries and lists.
+
+    Returns serialized object.
+
+    Keyword arguments:
+    coverage_points -- list of CoveragePoint objects.
+    """
+    points_by_file = {}
+    for point in coverage_points:
+        for loc in point.locs:
+            if loc.filename not in points_by_file:
+                points_by_file[loc.filename] = []
+            points_by_file[loc.filename].append(point)
+
+    serialized = {}
+    for filename, file_points in points_by_file.items():
+        serialized[filename] = {}
+        points_by_fn = {}
+        for point in file_points:
+            for loc in point.locs:
+                if loc.fun not in points_by_fn:
+                    points_by_fn[loc.fun] = []
+                points_by_fn[loc.fun].append(point)
+
+        for fun, fun_points in points_by_fn.items():
+            written_ids = []
+            serialized[filename][fun] = {}
+            for point in fun_points:
+                for loc in point.locs:
+                    if loc.filename != filename or loc.fun != fun:
+                        continue
+
+                    if point.point_id in written_ids:
+                        continue
+
+                    written_ids.append(point.point_id)
+                    serialized[filename][fun][point.point_id] = loc.line
+
+    return serialized
+
+
+if __name__ == '__main__':
+    PROG_NAME = sys.argv[0]
+    ARGC = len(sys.argv)
+    if ARGC < 3 or ARGC % 2 != 1:
+        usage()
+
+    SANCOV_DUMPS = []
+    BINARIES = []
+    for i in range(1, ARGC, 2):
+        SANCOV_DUMPS.append(os.path.abspath(sys.argv[i]))
+        BINARIES.append(os.path.abspath(sys.argv[i + 1]))
+
+    print_symcov(dict(zip(SANCOV_DUMPS, BINARIES)))