Add '--debug-dirs' option to sancov_symbolize.py
authorDmitriy Nikiforov <d.nikiforov@partner.samsung.com>
Fri, 9 Jun 2017 16:27:28 +0000 (19:27 +0300)
committerMaria Guseva <m.guseva@samsung.com>
Tue, 11 Jul 2017 02:46:03 +0000 (11:46 +0900)
Added '--debug-dirs' for specifying paths to directories with .debug files
containing debug information for specified binaries.

Changes:
  - Moved arguments parsing logic to a new function 'parse_args'
  - print_symcov() now accepts list of tuples as its argument instead of a dict
  - Added 'debug_info' argument to get_symbolized_coverage()
  - Updated usage message

scripts/sancov_symbolize.py

index 90490df244e8555e293349a4282e380eb36c259c..5d2966aa678910b3be0313d723b79f878d8f313d 100755 (executable)
@@ -13,6 +13,7 @@ import hashlib
 import json
 import re
 import os
+from os.path import realpath, isfile, basename
 
 SANCOV_EXEC = 'sancov.py'
 SYMBOLIZER_EXEC = 'addr2line'
@@ -29,8 +30,11 @@ def usage():
     """
     Prints 'Usage' message and exits with error.
     """
-    sys.stderr.write('usage: ' + PROG_NAME +
-                     ' DUMP BINARY [DUMP BINARY [...]]\n')
+    sys.stderr.write(
+        "usage: {0} [OPTIONS] DUMP BINARY [DUMP BINARY [...]]\n"
+        "options:\n"
+        "  --debug-dirs  list of colon-separated paths to directories with .debug files\n".
+        format(PROG_NAME))
     exit(1)
 
 
@@ -50,11 +54,11 @@ def print_symcov(args):
     Generates and writes 'symcov' file to stdout.
 
     Keyword arguments:
-    args -- dictionary with paths to SanitizerCoverage dump files as keys and
-            paths to corresponding binaries as values.
+    args -- list of tuples in following format:
+            [(sancov, binary, debug_info), ...]
     """
     coverages = []
-    for sancov_dump, binary in args.items():
+    for sancov_dump, binary, debug_info in args:
         sha1 = hashlib.sha1()
         with open(binary, 'rb') as binary_file:
             while True:
@@ -63,7 +67,8 @@ def print_symcov(args):
                     break
                 sha1.update(data)
         coverages.append(
-            get_symbolized_coverage(binary, sancov_dump, sha1.hexdigest()))
+            get_symbolized_coverage(binary, sancov_dump,
+                                    sha1.hexdigest(), debug_info))
 
     serialized_coverage = serialize_coverage(coverages)
     sys.stdout.write(
@@ -119,7 +124,7 @@ def parse_symbolizer_output(cmd, regexp, binary_hash):
     return points
 
 
-def get_symbolized_coverage(binary, sancov_dump, binary_hash):
+def get_symbolized_coverage(binary, sancov_dump, binary_hash, debug_info):
     """
     Creates SymbolizedCoverage object.
 
@@ -129,6 +134,7 @@ def get_symbolized_coverage(binary, sancov_dump, binary_hash):
     binary      -- path to covered binary file.
     sancov_dump -- path to SanitizerCoverage dump file corresponding to binary.
     binary_hash -- hash of the binary.
+    debug_info  -- path to debug info file. Can be the same as 'binary'.
     """
     if which(SANCOV_EXEC) is None:
         sys.stderr.write(SANCOV_EXEC + ': no such file\n')
@@ -139,12 +145,12 @@ def get_symbolized_coverage(binary, sancov_dump, binary_hash):
 
     regexp = re.compile(r'^(0x[0-9a-fA-F]*): (\w*) at (.*):(\d*)$')
 
-    cmd = '{0} print {1} | {2} -e {3} -afpC'.format(SANCOV_EXEC, sancov_dump,
-                                                    SYMBOLIZER_EXEC, binary)
+    cmd = '{0} print {1} | {2} -e {3} -afpC'.format(
+        SANCOV_EXEC, sancov_dump, SYMBOLIZER_EXEC, debug_info)
     covered_points = parse_symbolizer_output(cmd, regexp, binary_hash)
 
-    cmd = '{0} print {1} | {0} missing {3} | {2} -e {3} -afpC'.format(
-        SANCOV_EXEC, sancov_dump, SYMBOLIZER_EXEC, binary)
+    cmd = '{0} print {1} | {0} missing {3} | {2} -e {4} -afpC'.format(
+        SANCOV_EXEC, sancov_dump, SYMBOLIZER_EXEC, binary, debug_info)
     not_covered_points = parse_symbolizer_output(cmd, regexp, binary_hash)
 
     covered_ids = [point.point_id for point in covered_points]
@@ -228,16 +234,55 @@ def serialize_all_points(coverage_points):
     return serialized
 
 
-if __name__ == '__main__':
-    PROG_NAME = sys.argv[0]
-    ARGC = len(sys.argv)
-    if ARGC < 3 or ARGC % 2 != 1:
+def parse_args(args):
+    """
+    Parses command line arguments.
+
+    Keyword arguments:
+    args -- command line arguments stripped of the executable name.
+    """
+    argc = len(args)
+    if argc == 0:
+        usage()
+
+    debug_info = []
+    path_index = 0
+    if args[0].startswith("--debug-dirs"):
+        index = args[0].find("=") + 1
+        if index > 0:
+            debug_dirs = args[0][index:].split(":")
+            path_index = 1
+        else:
+            debug_dirs = args[1].split(":")
+            path_index = 2
+        debug_info = [
+            debug_dir + "/" + f
+            for debug_dir in debug_dirs for f in os.listdir(debug_dir)
+            if isfile(debug_dir + "/" + f)
+        ]
+
+    if (argc - path_index) % 2 != 0:
         usage()
 
-    SANCOV_DUMPS = []
-    BINARIES = []
-    for i in range(1, ARGC, 2):
-        SANCOV_DUMPS.append(os.path.abspath(sys.argv[i]))
-        BINARIES.append(os.path.abspath(sys.argv[i + 1]))
+    sancov_dumps = []
+    binaries = []
+    filtered_debug_info = []
+
+    for i in range(path_index, argc, 2):
+        sancov_dumps.append(realpath(args[i]))
+        binaries.append(realpath(args[i + 1]))
+        # TODO: replace basic filenames comparison check with
+        # binary debug-link check
+        for debug_file in debug_info:
+            if basename(binaries[-1]) + ".debug" == basename(debug_file):
+                filtered_debug_info.append(debug_file)
+                break
+        if len(filtered_debug_info) < len(binaries):
+            filtered_debug_info.append(binaries[-1])
+
+    print_symcov(zip(sancov_dumps, binaries, filtered_debug_info))
 
-    print_symcov(dict(zip(SANCOV_DUMPS, BINARIES)))
+
+if __name__ == '__main__':
+    PROG_NAME = sys.argv[0]
+    parse_args(sys.argv[1:])