From c98872e3a3c0fc4390e0892866d7e844df929179 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko@apple.com>
Date: Thu, 14 May 2020 13:31:01 +0300
Subject: [PATCH] [analyzer] Modernize analyzer's Python scripts

Summary:
Fix read/write in binary format, which crashes Python 3.
Additionally, clean up redundant (as for Python 3) code and
fix a handful of flake8 warnings.

Differential Revision: https://reviews.llvm.org/D79932
---
 clang/utils/analyzer/CmpRuns.py                 | 30 ++++++++-----
 clang/utils/analyzer/SATestAdd.py               | 27 ++++++-----
 clang/utils/analyzer/SATestBuild.py             | 27 ++++++-----
 clang/utils/analyzer/SATestUpdateDiffs.py       | 13 +++---
 clang/utils/analyzer/SATestUtils.py             |  1 +
 clang/utils/analyzer/SumTimerInfo.py            |  4 +-
 clang/utils/analyzer/exploded-graph-rewriter.py | 60 ++++++++-----------------
 7 files changed, 78 insertions(+), 84 deletions(-)

diff --git a/clang/utils/analyzer/CmpRuns.py b/clang/utils/analyzer/CmpRuns.py
index 3fab6ef..28e9258 100755
--- a/clang/utils/analyzer/CmpRuns.py
+++ b/clang/utils/analyzer/CmpRuns.py
@@ -39,7 +39,8 @@ import sys
 
 STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL)
 
-class Colors(object):
+
+class Colors:
     """
     Color for terminal highlight.
     """
@@ -47,18 +48,21 @@ class Colors(object):
     GREEN = '\x1b[6;30;42m'
     CLEAR = '\x1b[0m'
 
-# Information about analysis run:
-# path - the analysis output directory
-# root - the name of the root directory, which will be disregarded when
-# determining the source file name
-class SingleRunInfo(object):
+
+class SingleRunInfo:
+    """
+    Information about analysis run:
+    path - the analysis output directory
+    root - the name of the root directory, which will be disregarded when
+    determining the source file name
+    """
     def __init__(self, path, root="", verboseLog=None):
         self.path = path
         self.root = root.rstrip("/\\")
         self.verboseLog = verboseLog
 
 
-class AnalysisDiagnostic(object):
+class AnalysisDiagnostic:
     def __init__(self, data, report, htmlReport):
         self._data = data
         self._loc = self._data['location']
@@ -80,7 +84,7 @@ class AnalysisDiagnostic(object):
         p = path[0]
         if 'location' in p:
             fIdx = p['location']['file']
-        else: # control edge
+        else:  # control edge
             fIdx = path[0]['edges'][0]['start'][0]['file']
         out = self._report.files[fIdx]
         root = self._report.run.root
@@ -139,14 +143,14 @@ class AnalysisDiagnostic(object):
         return self._data
 
 
-class AnalysisReport(object):
+class AnalysisReport:
     def __init__(self, run, files):
         self.run = run
         self.files = files
         self.diagnostics = []
 
 
-class AnalysisRun(object):
+class AnalysisRun:
     def __init__(self, info):
         self.path = info.path
         self.root = info.root
@@ -303,12 +307,14 @@ def compareResults(A, B, opts):
 
     return res
 
+
 def computePercentile(l, percentile):
     """
     Return computed percentile.
     """
     return sorted(l)[int(round(percentile * len(l) + 0.5)) - 1]
 
+
 def deriveStats(results):
     # Assume all keys are the same in each statistics bucket.
     combined_data = defaultdict(list)
@@ -355,6 +361,7 @@ def compareStats(resultsA, resultsB):
                         report = Colors.RED + report + Colors.CLEAR
             print("\t %s %s" % (kkey, report))
 
+
 def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
                              Stdout=sys.stdout):
     # Load the run results.
@@ -367,7 +374,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
 
     # Open the verbose log, if given.
     if opts.verboseLog:
-        auxLog = open(opts.verboseLog, "wb")
+        auxLog = open(opts.verboseLog, "w")
     else:
         auxLog = None
 
@@ -405,6 +412,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
 
     return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
 
+
 def generate_option_parser():
     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
     parser.add_option("", "--rootA", dest="rootA",
diff --git a/clang/utils/analyzer/SATestAdd.py b/clang/utils/analyzer/SATestAdd.py
index 52089f4..e0e267b 100755
--- a/clang/utils/analyzer/SATestAdd.py
+++ b/clang/utils/analyzer/SATestAdd.py
@@ -52,8 +52,8 @@ import sys
 
 def isExistingProject(PMapFile, projectID):
     PMapReader = csv.reader(PMapFile)
-    for I in PMapReader:
-        if projectID == I[0]:
+    for ProjectInfo in PMapReader:
+        if projectID == ProjectInfo[0]:
             return True
     return False
 
@@ -71,21 +71,24 @@ def addNewProject(ID, BuildMode):
         sys.exit(-1)
 
     # Build the project.
+    # TODO: Repair this call.  We give it a wrong amount wrong arguments and it
+    #       is not trivial to construct argparse arguments in here.
+    #       Requires refactoring of the 'testProject' function.
     SATestBuild.testProject(ID, BuildMode, IsReferenceBuild=True)
 
     # Add the project ID to the project map.
     ProjectMapPath = os.path.join(CurDir, SATestBuild.ProjectMapFile)
 
     if os.path.exists(ProjectMapPath):
-        FileMode = "r+b"
+        FileMode = "r+"
     else:
         print("Warning: Creating the Project Map file!!")
-        FileMode = "w+b"
+        FileMode = "w+"
 
     with open(ProjectMapPath, FileMode) as PMapFile:
         if (isExistingProject(PMapFile, ID)):
-            print('Warning: Project with ID \'', ID, \
-                                 '\' already exists.', file=sys.stdout)
+            print('Warning: Project with ID \'', ID,
+                  '\' already exists.', file=sys.stdout)
             print("Reference output has been regenerated.", file=sys.stdout)
         else:
             PMapWriter = csv.writer(PMapFile)
@@ -97,12 +100,12 @@ def addNewProject(ID, BuildMode):
 # TODO: Set the path to the Repository directory.
 if __name__ == '__main__':
     if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'):
-        print('Add a new project for testing to the analyzer'\
-                             '\nUsage: ', sys.argv[0],\
-                             'project_ID <mode>\n' \
-                             'mode: 0 for single file project, ' \
-                             '1 for scan_build, ' \
-                             '2 for single file c++11 project', file=sys.stderr)
+        print('Add a new project for testing to the analyzer'
+              '\nUsage: ', sys.argv[0],
+              'project_ID <mode>\n'
+              'mode: 0 for single file project, '
+              '1 for scan_build, '
+              '2 for single file c++11 project', file=sys.stderr)
         sys.exit(-1)
 
     BuildMode = 1
diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index 42b2658..7ca0544 100755
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -75,7 +75,8 @@ logging.basicConfig(
     level=logging.DEBUG,
     format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 
-class StreamToLogger(object):
+
+class StreamToLogger:
     def __init__(self, logger, log_level=logging.INFO):
         self.logger = logger
         self.log_level = log_level
@@ -377,7 +378,7 @@ def runAnalyzePreprocessed(Args, Dir, SBOutputDir, Mode):
         # Build and call the analyzer command.
         OutputOption = "-o '%s.plist' " % os.path.join(PlistPath, FileName)
         Command = CmdPrefix + OutputOption + ("'%s'" % FileName)
-        LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
+        LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+")
         try:
             if Verbose == 1:
                 Local.stdout.write("  Executing: %s\n" % (Command,))
@@ -432,7 +433,7 @@ def buildProject(Args, Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
 
     # Build and analyze the project.
-    with open(BuildLogPath, "wb+") as PBuildLogFile:
+    with open(BuildLogPath, "w+") as PBuildLogFile:
         if (ProjectBuildMode == 1):
             downloadAndPatch(Dir, PBuildLogFile)
             runCleanupScript(Dir, PBuildLogFile)
@@ -646,7 +647,7 @@ class TestProjectThread(threading.Thread):
         self.TasksQueue = TasksQueue
         self.ResultsDiffer = ResultsDiffer
         self.FailureFlag = FailureFlag
-        super(TestProjectThread, self).__init__()
+        super().__init__()
 
         # Needed to gracefully handle interrupts with Ctrl-C
         self.daemon = True
@@ -700,7 +701,7 @@ def testProject(Args, ID, ProjectBuildMode, IsReferenceBuild=False, Strictness=0
 
 
 def projectFileHandler():
-    return open(getProjectMapPath(), "rb")
+    return open(getProjectMapPath(), "r")
 
 
 def iterateOverProjects(PMapFile):
@@ -709,25 +710,26 @@ def iterateOverProjects(PMapFile):
     from the start.
     """
     PMapFile.seek(0)
-    for I in csv.reader(PMapFile):
-        if (SATestUtils.isCommentCSVLine(I)):
+    for ProjectInfo in csv.reader(PMapFile):
+        if (SATestUtils.isCommentCSVLine(ProjectInfo)):
             continue
-        yield I
+        yield ProjectInfo
 
 
 def validateProjectFile(PMapFile):
     """
     Validate project file.
     """
-    for I in iterateOverProjects(PMapFile):
-        if len(I) != 2:
+    for ProjectInfo in iterateOverProjects(PMapFile):
+        if len(ProjectInfo) != 2:
             print("Error: Rows in the ProjectMapFile should have 2 entries.")
             raise Exception()
-        if I[1] not in ('0', '1', '2'):
-            print("Error: Second entry in the ProjectMapFile should be 0" \
+        if ProjectInfo[1] not in ('0', '1', '2'):
+            print("Error: Second entry in the ProjectMapFile should be 0"
                   " (single file), 1 (project), or 2(single file c++11).")
             raise Exception()
 
+
 def singleThreadedTestAll(Args, ProjectsToTest):
     """
     Run all projects.
@@ -738,6 +740,7 @@ def singleThreadedTestAll(Args, ProjectsToTest):
         Success &= testProject(Args, *ProjArgs)
     return Success
 
+
 def multiThreadedTestAll(Args, ProjectsToTest, Jobs):
     """
     Run each project in a separate thread.
diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index ea3c08c..70f54d8 100755
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -31,14 +31,14 @@ def updateReferenceResults(ProjName, ProjBuildMode):
         SATestBuild.getSBOutputDirName(IsReferenceBuild=False))
 
     if not os.path.exists(CreatedResultsPath):
-        print("New results not found, was SATestBuild.py "\
-                             "previously run?", file=sys.stderr)
+        print("New results not found, was SATestBuild.py "
+              "previously run?", file=sys.stderr)
         sys.exit(1)
 
     BuildLogPath = SATestBuild.getBuildLogPath(RefResultsPath)
     Dirname = os.path.dirname(os.path.abspath(BuildLogPath))
     runCmd("mkdir -p '%s'" % Dirname)
-    with open(BuildLogPath, "wb+") as PBuildLogFile:
+    with open(BuildLogPath, "w+") as PBuildLogFile:
         # Remove reference results: in git, and then again for a good measure
         # with rm, as git might not remove things fully if there are empty
         # directories involved.
@@ -63,9 +63,10 @@ def updateReferenceResults(ProjName, ProjBuildMode):
 
 def main(argv):
     if len(argv) == 2 and argv[1] in ('-h', '--help'):
-        print("Update static analyzer reference results based "\
-                             "\non the previous run of SATestBuild.py.\n"\
-                             "\nN.B.: Assumes that SATestBuild.py was just run", file=sys.stderr)
+        print("Update static analyzer reference results based "
+              "\non the previous run of SATestBuild.py.\n"
+              "\nN.B.: Assumes that SATestBuild.py was just run",
+              file=sys.stderr)
         sys.exit(1)
 
     with SATestBuild.projectFileHandler() as f:
diff --git a/clang/utils/analyzer/SATestUtils.py b/clang/utils/analyzer/SATestUtils.py
index 0ed4a4b..079b06a 100644
--- a/clang/utils/analyzer/SATestUtils.py
+++ b/clang/utils/analyzer/SATestUtils.py
@@ -5,6 +5,7 @@ import sys
 
 Verbose = 1
 
+
 def which(command, paths=None):
     """which(command, [paths]) - Look up the given command in the paths string
     (or the PATH environment variable, if unspecified)."""
diff --git a/clang/utils/analyzer/SumTimerInfo.py b/clang/utils/analyzer/SumTimerInfo.py
index 36e519a..5d86f76 100644
--- a/clang/utils/analyzer/SumTimerInfo.py
+++ b/clang/utils/analyzer/SumTimerInfo.py
@@ -12,8 +12,8 @@ import sys
 
 if __name__ == '__main__':
     if len(sys.argv) < 2:
-        print('Usage: ', sys.argv[0],\
-                             'scan_build_output_file', file=sys.stderr)
+        print('Usage: ', sys.argv[0],
+              'scan_build_output_file', file=sys.stderr)
         sys.exit(-1)
 
     f = open(sys.argv[1], 'r')
diff --git a/clang/utils/analyzer/exploded-graph-rewriter.py b/clang/utils/analyzer/exploded-graph-rewriter.py
index f47be59..bae863c 100755
--- a/clang/utils/analyzer/exploded-graph-rewriter.py
+++ b/clang/utils/analyzer/exploded-graph-rewriter.py
@@ -34,7 +34,7 @@ def diff_dicts(curr, prev):
 
 
 # Represents any program state trait that is a dictionary of key-value pairs.
-class GenericMap(object):
+class GenericMap:
     def __init__(self, items):
         self.generic_map = collections.OrderedDict(items)
 
@@ -47,9 +47,8 @@ class GenericMap(object):
 
 
 # A deserialized source location.
-class SourceLocation(object):
+class SourceLocation:
     def __init__(self, json_loc):
-        super(SourceLocation, self).__init__()
         logging.debug('json: %s' % json_loc)
         self.line = json_loc['line']
         self.col = json_loc['column']
@@ -63,9 +62,8 @@ class SourceLocation(object):
 
 
 # A deserialized program point.
-class ProgramPoint(object):
+class ProgramPoint:
     def __init__(self, json_pp):
-        super(ProgramPoint, self).__init__()
         self.kind = json_pp['kind']
         self.tag = json_pp['tag']
         self.node_id = json_pp['node_id']
@@ -90,9 +88,8 @@ class ProgramPoint(object):
 
 
 # A single expression acting as a key in a deserialized Environment.
-class EnvironmentBindingKey(object):
+class EnvironmentBindingKey:
     def __init__(self, json_ek):
-        super(EnvironmentBindingKey, self).__init__()
         # CXXCtorInitializer is not a Stmt!
         self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
             else json_ek['init_id']
@@ -110,9 +107,8 @@ class EnvironmentBindingKey(object):
 
 
 # Deserialized description of a location context.
-class LocationContext(object):
+class LocationContext:
     def __init__(self, json_frame):
-        super(LocationContext, self).__init__()
         self.lctx_id = json_frame['lctx_id']
         self.caption = json_frame['location_context']
         self.decl = json_frame['calling']
@@ -131,9 +127,8 @@ class LocationContext(object):
 
 # A group of deserialized Environment bindings that correspond to a specific
 # location context.
-class EnvironmentFrame(object):
+class EnvironmentFrame:
     def __init__(self, json_frame):
-        super(EnvironmentFrame, self).__init__()
         self.location_context = LocationContext(json_frame)
         self.bindings = collections.OrderedDict(
             [(EnvironmentBindingKey(b),
@@ -150,9 +145,8 @@ class EnvironmentFrame(object):
 
 # A deserialized Environment. This class can also hold other entities that
 # are similar to Environment, such as Objects Under Construction.
-class GenericEnvironment(object):
+class GenericEnvironment:
     def __init__(self, json_e):
-        super(GenericEnvironment, self).__init__()
         self.frames = [EnvironmentFrame(f) for f in json_e]
 
     def diff_frames(self, prev):
@@ -181,9 +175,8 @@ class GenericEnvironment(object):
 
 
 # A single binding key in a deserialized RegionStore cluster.
-class StoreBindingKey(object):
+class StoreBindingKey:
     def __init__(self, json_sk):
-        super(StoreBindingKey, self).__init__()
         self.kind = json_sk['kind']
         self.offset = json_sk['offset']
 
@@ -198,9 +191,8 @@ class StoreBindingKey(object):
 
 
 # A single cluster of the deserialized RegionStore.
-class StoreCluster(object):
+class StoreCluster:
     def __init__(self, json_sc):
-        super(StoreCluster, self).__init__()
         self.base_region = json_sc['cluster']
         self.bindings = collections.OrderedDict(
             [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
@@ -214,9 +206,8 @@ class StoreCluster(object):
 
 
 # A deserialized RegionStore.
-class Store(object):
+class Store:
     def __init__(self, json_s):
-        super(Store, self).__init__()
         self.ptr = json_s['pointer']
         self.clusters = collections.OrderedDict(
             [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
@@ -235,9 +226,8 @@ class Store(object):
 
 # Deserialized messages from a single checker in a single program state.
 # Basically a list of raw strings.
-class CheckerLines(object):
+class CheckerLines:
     def __init__(self, json_lines):
-        super(CheckerLines, self).__init__()
         self.lines = json_lines
 
     def diff_lines(self, prev):
@@ -250,9 +240,8 @@ class CheckerLines(object):
 
 
 # Deserialized messages of all checkers, separated by checker.
-class CheckerMessages(object):
+class CheckerMessages:
     def __init__(self, json_m):
-        super(CheckerMessages, self).__init__()
         self.items = collections.OrderedDict(
             [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
 
@@ -269,9 +258,8 @@ class CheckerMessages(object):
 
 
 # A deserialized program state.
-class ProgramState(object):
+class ProgramState:
     def __init__(self, state_id, json_ps):
-        super(ProgramState, self).__init__()
         logging.debug('Adding ProgramState ' + str(state_id))
 
         if json_ps is None:
@@ -315,9 +303,8 @@ class ProgramState(object):
 # A deserialized exploded graph node. Has a default constructor because it
 # may be referenced as part of an edge before its contents are deserialized,
 # and in this moment we already need a room for predecessors and successors.
-class ExplodedNode(object):
+class ExplodedNode:
     def __init__(self):
-        super(ExplodedNode, self).__init__()
         self.predecessors = []
         self.successors = []
 
@@ -338,7 +325,7 @@ class ExplodedNode(object):
 
 # A deserialized ExplodedGraph. Constructed by consuming a .dot file
 # line-by-line.
-class ExplodedGraph(object):
+class ExplodedGraph:
     # Parse .dot files with regular expressions.
     node_re = re.compile(
         '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
@@ -346,7 +333,6 @@ class ExplodedGraph(object):
         '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
 
     def __init__(self):
-        super(ExplodedGraph, self).__init__()
         self.nodes = collections.defaultdict(ExplodedNode)
         self.root_id = None
         self.incomplete_line = ''
@@ -407,10 +393,9 @@ class ExplodedGraph(object):
 
 # A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
 # syntax highlighing.
-class DotDumpVisitor(object):
+class DotDumpVisitor:
     def __init__(self, do_diffs, dark_mode, gray_mode,
                  topo_mode, dump_dot_only):
-        super(DotDumpVisitor, self).__init__()
         self._do_diffs = do_diffs
         self._dark_mode = dark_mode
         self._gray_mode = gray_mode
@@ -896,10 +881,7 @@ class DotDumpVisitor(object):
 
 
 # BasicExplorer explores the whole graph in no particular order.
-class BasicExplorer(object):
-    def __init__(self):
-        super(BasicExplorer, self).__init__()
-
+class BasicExplorer:
     def explore(self, graph, visitor):
         visitor.visit_begin_graph(graph)
         for node in sorted(graph.nodes):
@@ -919,10 +901,7 @@ class BasicExplorer(object):
 
 # SinglePathTrimmer keeps only a single path - the leftmost path from the root.
 # Useful when the trimmed graph is still too large.
-class SinglePathTrimmer(object):
-    def __init__(self):
-        super(SinglePathTrimmer, self).__init__()
-
+class SinglePathTrimmer:
     def trim(self, graph):
         visited_nodes = set()
         node_id = graph.root_id
@@ -946,9 +925,8 @@ class SinglePathTrimmer(object):
 # TargetedTrimmer keeps paths that lead to specific nodes and discards all
 # other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
 # a crash).
-class TargetedTrimmer(object):
+class TargetedTrimmer:
     def __init__(self, target_nodes):
-        super(TargetedTrimmer, self).__init__()
         self._target_nodes = target_nodes
 
     @staticmethod
-- 
2.7.4