2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 import argparse, os, sys, json, subprocess, pickle, StringIO
8 parser = argparse.ArgumentParser(
10 "Process the Blink points-to graph generated by the Blink GC plugin.")
13 '-', dest='use_stdin', action='store_true',
14 help='Read JSON graph files from stdin')
17 '-v', '--verbose', action='store_true',
18 help='Verbose output')
21 '-c', '--detect-cycles', action='store_true', default=True,
22 help='Detect cycles containing GC roots')
25 '-i', '--ignored-cycles', default=None,
26 help='File with cycles to ignore')
29 '-p', '--pickle-graph', default=None,
30 help='File to read/save the graph from/to')
33 'files', metavar='FILE', nargs='*', default=[],
34 help='JSON graph files or directories containing them')
36 # Command line args after parsing.
39 # Map from node labels to nodes.
45 # List of cycles to ignore.
48 # Global flag to determine exit code.
49 global_reported_error = False
51 def set_reported_error(value):
52 global global_reported_error
53 global_reported_error = value
56 return global_reported_error
64 global global_inc_copy
68 return graph.setdefault(name, Node(name))
70 # Representation of graph nodes. Basically a map of directed edges.
72 def __init__(self, name):
77 return "%s(%s) %s" % (self.name, self.visited, self.edges)
78 def update_node(self, decl):
79 # Currently we don't track any node info besides its edges.
81 def update_edge(self, e):
83 edge = self.edges.get(new_edge.key)
85 # If an edge exist, its kind is the strongest of the two.
86 edge.kind = max(edge.kind, new_edge.kind)
88 self.edges[new_edge.key] = new_edge
89 def super_edges(self):
90 return [ e for e in self.edges.itervalues() if e.is_super() ]
92 self.cost = sys.maxint
96 # Representation of directed graph edges.
98 def __init__(self, **decl):
99 self.src = decl['src']
100 self.dst = decl['dst']
101 self.lbl = decl['lbl']
102 self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
103 self.loc = decl['loc']
104 # The label does not uniquely determine an edge from a node. We
105 # define the semi-unique key to be the concatenation of the
106 # label and dst name. This is sufficient to track the strongest
107 # edge to a particular type. For example, if the field A::m_f
108 # has type HashMap<WeakMember<B>, Member<B>> we will have a
109 # strong edge with key m_f#B from A to B.
110 self.key = '%s#%s' % (self.lbl, self.dst)
119 return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
121 return self.kind == 2
123 return self.kind == 0
124 def keeps_alive(self):
126 def is_subclass(self):
127 return self.lbl.startswith('<subclass>')
129 return self.lbl.startswith('<super>')
131 def parse_file(filename):
132 obj = json.load(open(filename))
135 def build_graphs_in_dir(dirname):
136 # TODO: Use plateform independent code, eg, os.walk
137 files = subprocess.check_output(
138 ['find', dirname, '-name', '*.graph.json']).split('\n')
139 log("Found %d files" % len(files))
146 def build_graph(filename):
147 for decl in parse_file(filename):
148 if decl.has_key('name'):
149 # Add/update a node entry
151 node = get_node(name)
152 node.update_node(decl)
154 # Add/update an edge entry
156 node = get_node(name)
157 node.update_edge(decl)
159 # Copy all non-weak edges from super classes to their subclasses.
160 # This causes all fields of a super to be considered fields of a
161 # derived class without tranitively relating derived classes with
162 # each other. For example, if B <: A, C <: A, and for some D, D => B,
163 # we don't want that to entail that D => C.
164 def copy_super_edges(edge):
165 if edge.is_weak() or not edge.is_super():
168 # Make the super-class edge weak (prohibits processing twice).
170 # If the super class is not in our graph exit early.
171 super_node = graph.get(edge.dst)
172 if super_node is None: return
173 # Recursively copy all super-class edges.
174 for e in super_node.super_edges():
176 # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
177 sub_node = graph[edge.src]
178 for e in super_node.edges.itervalues():
179 if e.keeps_alive() and not e.is_subclass():
181 new_edge.src = sub_node.name
182 new_edge.lbl = '%s <: %s' % (super_node.name, e.lbl)
183 sub_node.edges[new_edge.key] = new_edge
184 # Add a strong sub-class edge.
185 sub_edge = edge.copy()
187 sub_edge.lbl = '<subclass>'
188 sub_edge.src = super_node.name
189 sub_edge.dst = sub_node.name
190 super_node.edges[sub_edge.key] = sub_edge
192 def complete_graph():
193 for node in graph.itervalues():
194 for edge in node.super_edges():
195 copy_super_edges(edge)
196 for edge in node.edges.itervalues():
199 log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)
202 for n in graph.itervalues():
205 def shortest_path(start, end):
208 while len(minlist) > 0:
209 minlist.sort(key=lambda n: -n.cost)
210 current = minlist.pop()
211 current.visited = True
212 if current == end or current.cost >= end.cost + 1:
214 for e in current.edges.itervalues():
215 if not e.keeps_alive():
217 dst = graph.get(e.dst)
218 if dst is None or dst.visited:
220 if current.cost < dst.cost:
221 dst.cost = current.cost + 1
226 for root_edge in roots:
228 src = graph[root_edge.src]
229 dst = graph.get(root_edge.dst)
230 if root_edge.dst == "WTF::String":
233 print "\nPersistent root to incomplete destination object:"
235 set_reported_error(True)
237 # Find the shortest path from the root target (dst) to its host (src)
238 shortest_path(dst, src)
239 if src.cost < sys.maxint:
240 report_cycle(root_edge)
242 def is_ignored_cycle(cycle):
243 for block in ignored_cycles:
244 if block_match(cycle, block):
247 def block_match(b1, b2):
248 if len(b1) != len(b2):
250 for (l1, l2) in zip(b1, b2):
255 def report_cycle(root_edge):
256 dst = graph[root_edge.dst]
262 edge = graph[edge.src].path
263 path.append(root_edge)
265 # Find the max loc length for pretty printing.
268 if len(p.loc) > max_loc:
270 out = StringIO.StringIO()
272 print >>out, (p.loc + ':').ljust(max_loc + 1), p
273 sout = out.getvalue()
274 if not is_ignored_cycle(sout):
275 print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
276 set_reported_error(True)
281 log("Reading graph from pickled file: " + args.pickle_graph)
282 dump = pickle.load(open(args.pickle_graph, 'rb'))
287 log("Saving graph to pickle file: " + args.pickle_graph)
288 dump = (graph, roots)
289 pickle.dump(dump, open(args.pickle_graph, 'wb'))
291 def read_ignored_cycles():
292 global ignored_cycles
293 if not args.ignored_cycles:
295 log("Reading ignored cycles from file: " + args.ignored_cycles)
297 for l in open(args.ignored_cycles):
301 ignored_cycles.append(block)
306 ignored_cycles.append(block)
310 args = parser.parse_args()
311 if not args.detect_cycles:
312 print "Please select an operation to perform (eg, -c to detect cycles)"
315 if args.pickle_graph and os.path.isfile(args.pickle_graph):
319 log("Reading files from stdin")
321 build_graph(f.strip())
323 log("Reading files and directories from command line")
324 if len(args.files) == 0:
325 print "Please provide files or directores for building the graph"
330 log("Building graph from files in directory: " + f)
331 build_graphs_in_dir(f)
333 log("Building graph from file: " + f)
335 log("Completing graph construction (%d graph nodes)" % len(graph))
337 if args.pickle_graph:
339 if args.detect_cycles:
340 read_ignored_cycles()
341 log("Detecting cycles containg GC roots")
347 if __name__ == '__main__':