From c98872e3a3c0fc4390e0892866d7e844df929179 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Thu, 14 May 2020 13:31:01 +0300 Subject: [PATCH] [analyzer] Modernize analyzer's Python scripts Summary: Fix read/write in binary format, which crashes Python 3. Additionally, clean up redundant (as for Python 3) code and fix a handful of flake8 warnings. Differential Revision: https://reviews.llvm.org/D79932 --- clang/utils/analyzer/CmpRuns.py | 30 ++++++++----- clang/utils/analyzer/SATestAdd.py | 27 ++++++----- clang/utils/analyzer/SATestBuild.py | 27 ++++++----- clang/utils/analyzer/SATestUpdateDiffs.py | 13 +++--- clang/utils/analyzer/SATestUtils.py | 1 + clang/utils/analyzer/SumTimerInfo.py | 4 +- clang/utils/analyzer/exploded-graph-rewriter.py | 60 ++++++++----------------- 7 files changed, 78 insertions(+), 84 deletions(-) diff --git a/clang/utils/analyzer/CmpRuns.py b/clang/utils/analyzer/CmpRuns.py index 3fab6ef..28e9258 100755 --- a/clang/utils/analyzer/CmpRuns.py +++ b/clang/utils/analyzer/CmpRuns.py @@ -39,7 +39,8 @@ import sys STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL) -class Colors(object): + +class Colors: """ Color for terminal highlight. """ @@ -47,18 +48,21 @@ class Colors(object): GREEN = '\x1b[6;30;42m' CLEAR = '\x1b[0m' -# Information about analysis run: -# path - the analysis output directory -# root - the name of the root directory, which will be disregarded when -# determining the source file name -class SingleRunInfo(object): + +class SingleRunInfo: + """ + Information about analysis run: + path - the analysis output directory + root - the name of the root directory, which will be disregarded when + determining the source file name + """ def __init__(self, path, root="", verboseLog=None): self.path = path self.root = root.rstrip("/\\") self.verboseLog = verboseLog -class AnalysisDiagnostic(object): +class AnalysisDiagnostic: def __init__(self, data, report, htmlReport): self._data = data self._loc = self._data['location'] @@ -80,7 +84,7 @@ class AnalysisDiagnostic(object): p = path[0] if 'location' in p: fIdx = p['location']['file'] - else: # control edge + else: # control edge fIdx = path[0]['edges'][0]['start'][0]['file'] out = self._report.files[fIdx] root = self._report.run.root @@ -139,14 +143,14 @@ class AnalysisDiagnostic(object): return self._data -class AnalysisReport(object): +class AnalysisReport: def __init__(self, run, files): self.run = run self.files = files self.diagnostics = [] -class AnalysisRun(object): +class AnalysisRun: def __init__(self, info): self.path = info.path self.root = info.root @@ -303,12 +307,14 @@ def compareResults(A, B, opts): return res + def computePercentile(l, percentile): """ Return computed percentile. """ return sorted(l)[int(round(percentile * len(l) + 0.5)) - 1] + def deriveStats(results): # Assume all keys are the same in each statistics bucket. combined_data = defaultdict(list) @@ -355,6 +361,7 @@ def compareStats(resultsA, resultsB): report = Colors.RED + report + Colors.CLEAR print("\t %s %s" % (kkey, report)) + def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True, Stdout=sys.stdout): # Load the run results. @@ -367,7 +374,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True, # Open the verbose log, if given. if opts.verboseLog: - auxLog = open(opts.verboseLog, "wb") + auxLog = open(opts.verboseLog, "w") else: auxLog = None @@ -405,6 +412,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True, return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics) + def generate_option_parser(): parser = OptionParser("usage: %prog [options] [dir A] [dir B]") parser.add_option("", "--rootA", dest="rootA", diff --git a/clang/utils/analyzer/SATestAdd.py b/clang/utils/analyzer/SATestAdd.py index 52089f4..e0e267b 100755 --- a/clang/utils/analyzer/SATestAdd.py +++ b/clang/utils/analyzer/SATestAdd.py @@ -52,8 +52,8 @@ import sys def isExistingProject(PMapFile, projectID): PMapReader = csv.reader(PMapFile) - for I in PMapReader: - if projectID == I[0]: + for ProjectInfo in PMapReader: + if projectID == ProjectInfo[0]: return True return False @@ -71,21 +71,24 @@ def addNewProject(ID, BuildMode): sys.exit(-1) # Build the project. + # TODO: Repair this call. We give it a wrong amount wrong arguments and it + # is not trivial to construct argparse arguments in here. + # Requires refactoring of the 'testProject' function. SATestBuild.testProject(ID, BuildMode, IsReferenceBuild=True) # Add the project ID to the project map. ProjectMapPath = os.path.join(CurDir, SATestBuild.ProjectMapFile) if os.path.exists(ProjectMapPath): - FileMode = "r+b" + FileMode = "r+" else: print("Warning: Creating the Project Map file!!") - FileMode = "w+b" + FileMode = "w+" with open(ProjectMapPath, FileMode) as PMapFile: if (isExistingProject(PMapFile, ID)): - print('Warning: Project with ID \'', ID, \ - '\' already exists.', file=sys.stdout) + print('Warning: Project with ID \'', ID, + '\' already exists.', file=sys.stdout) print("Reference output has been regenerated.", file=sys.stdout) else: PMapWriter = csv.writer(PMapFile) @@ -97,12 +100,12 @@ def addNewProject(ID, BuildMode): # TODO: Set the path to the Repository directory. if __name__ == '__main__': if len(sys.argv) < 2 or sys.argv[1] in ('-h', '--help'): - print('Add a new project for testing to the analyzer'\ - '\nUsage: ', sys.argv[0],\ - 'project_ID \n' \ - 'mode: 0 for single file project, ' \ - '1 for scan_build, ' \ - '2 for single file c++11 project', file=sys.stderr) + print('Add a new project for testing to the analyzer' + '\nUsage: ', sys.argv[0], + 'project_ID \n' + 'mode: 0 for single file project, ' + '1 for scan_build, ' + '2 for single file c++11 project', file=sys.stderr) sys.exit(-1) BuildMode = 1 diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py index 42b2658..7ca0544 100755 --- a/clang/utils/analyzer/SATestBuild.py +++ b/clang/utils/analyzer/SATestBuild.py @@ -75,7 +75,8 @@ logging.basicConfig( level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s') -class StreamToLogger(object): + +class StreamToLogger: def __init__(self, logger, log_level=logging.INFO): self.logger = logger self.log_level = log_level @@ -377,7 +378,7 @@ def runAnalyzePreprocessed(Args, Dir, SBOutputDir, Mode): # Build and call the analyzer command. OutputOption = "-o '%s.plist' " % os.path.join(PlistPath, FileName) Command = CmdPrefix + OutputOption + ("'%s'" % FileName) - LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b") + LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+") try: if Verbose == 1: Local.stdout.write(" Executing: %s\n" % (Command,)) @@ -432,7 +433,7 @@ def buildProject(Args, Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild): os.makedirs(os.path.join(SBOutputDir, LogFolderName)) # Build and analyze the project. - with open(BuildLogPath, "wb+") as PBuildLogFile: + with open(BuildLogPath, "w+") as PBuildLogFile: if (ProjectBuildMode == 1): downloadAndPatch(Dir, PBuildLogFile) runCleanupScript(Dir, PBuildLogFile) @@ -646,7 +647,7 @@ class TestProjectThread(threading.Thread): self.TasksQueue = TasksQueue self.ResultsDiffer = ResultsDiffer self.FailureFlag = FailureFlag - super(TestProjectThread, self).__init__() + super().__init__() # Needed to gracefully handle interrupts with Ctrl-C self.daemon = True @@ -700,7 +701,7 @@ def testProject(Args, ID, ProjectBuildMode, IsReferenceBuild=False, Strictness=0 def projectFileHandler(): - return open(getProjectMapPath(), "rb") + return open(getProjectMapPath(), "r") def iterateOverProjects(PMapFile): @@ -709,25 +710,26 @@ def iterateOverProjects(PMapFile): from the start. """ PMapFile.seek(0) - for I in csv.reader(PMapFile): - if (SATestUtils.isCommentCSVLine(I)): + for ProjectInfo in csv.reader(PMapFile): + if (SATestUtils.isCommentCSVLine(ProjectInfo)): continue - yield I + yield ProjectInfo def validateProjectFile(PMapFile): """ Validate project file. """ - for I in iterateOverProjects(PMapFile): - if len(I) != 2: + for ProjectInfo in iterateOverProjects(PMapFile): + if len(ProjectInfo) != 2: print("Error: Rows in the ProjectMapFile should have 2 entries.") raise Exception() - if I[1] not in ('0', '1', '2'): - print("Error: Second entry in the ProjectMapFile should be 0" \ + if ProjectInfo[1] not in ('0', '1', '2'): + print("Error: Second entry in the ProjectMapFile should be 0" " (single file), 1 (project), or 2(single file c++11).") raise Exception() + def singleThreadedTestAll(Args, ProjectsToTest): """ Run all projects. @@ -738,6 +740,7 @@ def singleThreadedTestAll(Args, ProjectsToTest): Success &= testProject(Args, *ProjArgs) return Success + def multiThreadedTestAll(Args, ProjectsToTest, Jobs): """ Run each project in a separate thread. diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py index ea3c08c..70f54d8 100755 --- a/clang/utils/analyzer/SATestUpdateDiffs.py +++ b/clang/utils/analyzer/SATestUpdateDiffs.py @@ -31,14 +31,14 @@ def updateReferenceResults(ProjName, ProjBuildMode): SATestBuild.getSBOutputDirName(IsReferenceBuild=False)) if not os.path.exists(CreatedResultsPath): - print("New results not found, was SATestBuild.py "\ - "previously run?", file=sys.stderr) + print("New results not found, was SATestBuild.py " + "previously run?", file=sys.stderr) sys.exit(1) BuildLogPath = SATestBuild.getBuildLogPath(RefResultsPath) Dirname = os.path.dirname(os.path.abspath(BuildLogPath)) runCmd("mkdir -p '%s'" % Dirname) - with open(BuildLogPath, "wb+") as PBuildLogFile: + with open(BuildLogPath, "w+") as PBuildLogFile: # Remove reference results: in git, and then again for a good measure # with rm, as git might not remove things fully if there are empty # directories involved. @@ -63,9 +63,10 @@ def updateReferenceResults(ProjName, ProjBuildMode): def main(argv): if len(argv) == 2 and argv[1] in ('-h', '--help'): - print("Update static analyzer reference results based "\ - "\non the previous run of SATestBuild.py.\n"\ - "\nN.B.: Assumes that SATestBuild.py was just run", file=sys.stderr) + print("Update static analyzer reference results based " + "\non the previous run of SATestBuild.py.\n" + "\nN.B.: Assumes that SATestBuild.py was just run", + file=sys.stderr) sys.exit(1) with SATestBuild.projectFileHandler() as f: diff --git a/clang/utils/analyzer/SATestUtils.py b/clang/utils/analyzer/SATestUtils.py index 0ed4a4b..079b06a 100644 --- a/clang/utils/analyzer/SATestUtils.py +++ b/clang/utils/analyzer/SATestUtils.py @@ -5,6 +5,7 @@ import sys Verbose = 1 + def which(command, paths=None): """which(command, [paths]) - Look up the given command in the paths string (or the PATH environment variable, if unspecified).""" diff --git a/clang/utils/analyzer/SumTimerInfo.py b/clang/utils/analyzer/SumTimerInfo.py index 36e519a..5d86f76 100644 --- a/clang/utils/analyzer/SumTimerInfo.py +++ b/clang/utils/analyzer/SumTimerInfo.py @@ -12,8 +12,8 @@ import sys if __name__ == '__main__': if len(sys.argv) < 2: - print('Usage: ', sys.argv[0],\ - 'scan_build_output_file', file=sys.stderr) + print('Usage: ', sys.argv[0], + 'scan_build_output_file', file=sys.stderr) sys.exit(-1) f = open(sys.argv[1], 'r') diff --git a/clang/utils/analyzer/exploded-graph-rewriter.py b/clang/utils/analyzer/exploded-graph-rewriter.py index f47be59..bae863c 100755 --- a/clang/utils/analyzer/exploded-graph-rewriter.py +++ b/clang/utils/analyzer/exploded-graph-rewriter.py @@ -34,7 +34,7 @@ def diff_dicts(curr, prev): # Represents any program state trait that is a dictionary of key-value pairs. -class GenericMap(object): +class GenericMap: def __init__(self, items): self.generic_map = collections.OrderedDict(items) @@ -47,9 +47,8 @@ class GenericMap(object): # A deserialized source location. -class SourceLocation(object): +class SourceLocation: def __init__(self, json_loc): - super(SourceLocation, self).__init__() logging.debug('json: %s' % json_loc) self.line = json_loc['line'] self.col = json_loc['column'] @@ -63,9 +62,8 @@ class SourceLocation(object): # A deserialized program point. -class ProgramPoint(object): +class ProgramPoint: def __init__(self, json_pp): - super(ProgramPoint, self).__init__() self.kind = json_pp['kind'] self.tag = json_pp['tag'] self.node_id = json_pp['node_id'] @@ -90,9 +88,8 @@ class ProgramPoint(object): # A single expression acting as a key in a deserialized Environment. -class EnvironmentBindingKey(object): +class EnvironmentBindingKey: def __init__(self, json_ek): - super(EnvironmentBindingKey, self).__init__() # CXXCtorInitializer is not a Stmt! self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \ else json_ek['init_id'] @@ -110,9 +107,8 @@ class EnvironmentBindingKey(object): # Deserialized description of a location context. -class LocationContext(object): +class LocationContext: def __init__(self, json_frame): - super(LocationContext, self).__init__() self.lctx_id = json_frame['lctx_id'] self.caption = json_frame['location_context'] self.decl = json_frame['calling'] @@ -131,9 +127,8 @@ class LocationContext(object): # A group of deserialized Environment bindings that correspond to a specific # location context. -class EnvironmentFrame(object): +class EnvironmentFrame: def __init__(self, json_frame): - super(EnvironmentFrame, self).__init__() self.location_context = LocationContext(json_frame) self.bindings = collections.OrderedDict( [(EnvironmentBindingKey(b), @@ -150,9 +145,8 @@ class EnvironmentFrame(object): # A deserialized Environment. This class can also hold other entities that # are similar to Environment, such as Objects Under Construction. -class GenericEnvironment(object): +class GenericEnvironment: def __init__(self, json_e): - super(GenericEnvironment, self).__init__() self.frames = [EnvironmentFrame(f) for f in json_e] def diff_frames(self, prev): @@ -181,9 +175,8 @@ class GenericEnvironment(object): # A single binding key in a deserialized RegionStore cluster. -class StoreBindingKey(object): +class StoreBindingKey: def __init__(self, json_sk): - super(StoreBindingKey, self).__init__() self.kind = json_sk['kind'] self.offset = json_sk['offset'] @@ -198,9 +191,8 @@ class StoreBindingKey(object): # A single cluster of the deserialized RegionStore. -class StoreCluster(object): +class StoreCluster: def __init__(self, json_sc): - super(StoreCluster, self).__init__() self.base_region = json_sc['cluster'] self.bindings = collections.OrderedDict( [(StoreBindingKey(b), b['value']) for b in json_sc['items']]) @@ -214,9 +206,8 @@ class StoreCluster(object): # A deserialized RegionStore. -class Store(object): +class Store: def __init__(self, json_s): - super(Store, self).__init__() self.ptr = json_s['pointer'] self.clusters = collections.OrderedDict( [(c['pointer'], StoreCluster(c)) for c in json_s['items']]) @@ -235,9 +226,8 @@ class Store(object): # Deserialized messages from a single checker in a single program state. # Basically a list of raw strings. -class CheckerLines(object): +class CheckerLines: def __init__(self, json_lines): - super(CheckerLines, self).__init__() self.lines = json_lines def diff_lines(self, prev): @@ -250,9 +240,8 @@ class CheckerLines(object): # Deserialized messages of all checkers, separated by checker. -class CheckerMessages(object): +class CheckerMessages: def __init__(self, json_m): - super(CheckerMessages, self).__init__() self.items = collections.OrderedDict( [(m['checker'], CheckerLines(m['messages'])) for m in json_m]) @@ -269,9 +258,8 @@ class CheckerMessages(object): # A deserialized program state. -class ProgramState(object): +class ProgramState: def __init__(self, state_id, json_ps): - super(ProgramState, self).__init__() logging.debug('Adding ProgramState ' + str(state_id)) if json_ps is None: @@ -315,9 +303,8 @@ class ProgramState(object): # A deserialized exploded graph node. Has a default constructor because it # may be referenced as part of an edge before its contents are deserialized, # and in this moment we already need a room for predecessors and successors. -class ExplodedNode(object): +class ExplodedNode: def __init__(self): - super(ExplodedNode, self).__init__() self.predecessors = [] self.successors = [] @@ -338,7 +325,7 @@ class ExplodedNode(object): # A deserialized ExplodedGraph. Constructed by consuming a .dot file # line-by-line. -class ExplodedGraph(object): +class ExplodedGraph: # Parse .dot files with regular expressions. node_re = re.compile( '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$') @@ -346,7 +333,6 @@ class ExplodedGraph(object): '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$') def __init__(self): - super(ExplodedGraph, self).__init__() self.nodes = collections.defaultdict(ExplodedNode) self.root_id = None self.incomplete_line = '' @@ -407,10 +393,9 @@ class ExplodedGraph(object): # A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based # syntax highlighing. -class DotDumpVisitor(object): +class DotDumpVisitor: def __init__(self, do_diffs, dark_mode, gray_mode, topo_mode, dump_dot_only): - super(DotDumpVisitor, self).__init__() self._do_diffs = do_diffs self._dark_mode = dark_mode self._gray_mode = gray_mode @@ -896,10 +881,7 @@ class DotDumpVisitor(object): # BasicExplorer explores the whole graph in no particular order. -class BasicExplorer(object): - def __init__(self): - super(BasicExplorer, self).__init__() - +class BasicExplorer: def explore(self, graph, visitor): visitor.visit_begin_graph(graph) for node in sorted(graph.nodes): @@ -919,10 +901,7 @@ class BasicExplorer(object): # SinglePathTrimmer keeps only a single path - the leftmost path from the root. # Useful when the trimmed graph is still too large. -class SinglePathTrimmer(object): - def __init__(self): - super(SinglePathTrimmer, self).__init__() - +class SinglePathTrimmer: def trim(self, graph): visited_nodes = set() node_id = graph.root_id @@ -946,9 +925,8 @@ class SinglePathTrimmer(object): # TargetedTrimmer keeps paths that lead to specific nodes and discards all # other paths. Useful when you cannot use -trim-egraph (e.g. when debugging # a crash). -class TargetedTrimmer(object): +class TargetedTrimmer: def __init__(self, target_nodes): - super(TargetedTrimmer, self).__init__() self._target_nodes = target_nodes @staticmethod -- 2.7.4