1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
7 from telemetry.core.heap import live_heap_object
8 from telemetry.core.heap import retaining_edge
10 class ChromeJsHeapSnapshotParser(object):
11 """ Parser for the heap snapshot.
13 The heap snapshot JSON format is defined by HeapSnapshotJSONSerializer in V8.
15 The snapshot contains a list of integers describing nodes (types, names, etc.)
16 and a list of integers describing edges (types, the node the edge points to,
17 etc.) and a string table. All strings are expressed as indices to the string
20 In addition, the snapshot contains meta information describing the data fields
21 for nodes and the data fields for edges.
24 _node_dict: {int -> LiveHeapObject}, maps integer ids to LiveHeapObject
26 _node_list: [int], the raw node data of the heap snapshot.
27 _edge_list: [int], the raw edge data of the heap snapshot.
28 _node_types: [str], the possible node types in the heap snapshot.
29 _edge_types: [str], the possible edge types in the heap snapshot.
30 _node_fields: [str], the fields present in the heap snapshot for each node.
31 _edge_fields: [str], the fields present in the heap snapshot for each node.
32 _node_type_ix: int, index of the node type field.
33 _node_name_ix: int, index of the node name field.
34 _node_id_ix: int, index of the node id field.
35 _node_edge_count_ix: int, index of the node edge count field.
36 _node_field_count: int, number of node fields.
37 _edge_type_ix: int, index of the edge type field.
38 _edge_name_or_ix_ix: int, index of the "edge name or index" field.
39 _edge_to_node_ix: int, index of the "to node for an edge" field.
40 _edge_field_count: int, number of edge fields.
43 def __init__(self, raw_data):
44 heap = json.loads(raw_data)
47 # Read the snapshot components (nodes, edges, strings, metadata).
48 self._node_list = heap['nodes']
49 self._edge_list = heap['edges']
50 self._strings = heap['strings']
52 self._node_types = heap['snapshot']['meta']['node_types'][0]
53 self._edge_types = heap['snapshot']['meta']['edge_types'][0]
54 node_fields = heap['snapshot']['meta']['node_fields']
55 edge_fields = heap['snapshot']['meta']['edge_fields']
57 # Find the indices of the required node and edge fields based on the
59 self._node_type_ix = node_fields.index('type')
60 self._node_name_ix = node_fields.index('name')
61 self._node_id_ix = node_fields.index('id')
62 self._node_edge_count_ix = node_fields.index('edge_count')
63 self._node_field_count = len(node_fields)
65 self._edge_type_ix = edge_fields.index('type')
66 self._edge_name_or_ix_ix = edge_fields.index('name_or_index')
67 self._edge_to_node_ix = edge_fields.index('to_node')
68 self._edge_field_count = len(edge_fields)
73 def CanImport(raw_data):
74 heap = json.loads(raw_data)
75 if ('nodes' not in heap or 'edges' not in heap or 'strings' not in heap or
76 'snapshot' not in heap or 'meta' not in heap['snapshot']):
78 meta = heap['snapshot']['meta']
79 if ('node_types' not in meta or 'edge_types' not in meta or
80 'node_fields' not in meta or 'edge_fields' not in meta):
82 node_fields = meta['node_fields']
83 edge_fields = meta['edge_fields']
84 if ('type' not in node_fields or 'name' not in node_fields or
85 'id' not in node_fields or 'edge_count' not in node_fields):
87 if ('type' not in edge_fields or 'name_or_index' not in edge_fields or
88 'to_node' not in edge_fields):
92 def GetAllLiveHeapObjects(self):
93 return self._node_dict.values()
96 def LiveHeapObjectToJavaScript(heap_object):
97 return heap_object.name or str(heap_object)
100 def RetainingEdgeToJavaScript(edge):
101 if edge.type_string == 'property':
102 return '.' + edge.name_string
103 if edge.type_string == 'element':
104 return '[' + edge.name_string + ']'
107 def _ParseSnapshot(self):
108 """Parses the stored JSON snapshot data.
110 Fills in self._node_dict with LiveHeapObject objects constructed based on
111 the heap snapshot. The LiveHeapObject objects contain the associated
112 RetainingEdge objects.
115 for ix in xrange(0, len(self._node_list), self._node_field_count):
116 edge_start_ix = self._ReadNodeFromIndex(ix, edge_start_ix)
118 # Add pointers to the endpoints to the edges, and associate the edges with
120 for node_id in self._node_dict:
121 n = self._node_dict[node_id]
122 for e in n.edges_from:
123 self._node_dict[e.to_object_id].AddEdgeTo(e)
125 e.SetToObject(self._node_dict[e.to_object_id])
127 def _ReadNodeFromIndex(self, ix, edges_start):
128 """Reads the data for a node from the heap snapshot.
130 If the index contains an interesting node, constructs a Node object and adds
131 it to self._node_dict.
134 ix: int, index into the self._node_list array.
135 edges_start: int, the index of the edge array where the edges for the node
138 int, the edge start index for the next node.
141 Exception: The node list of the snapshot is malformed.
143 if ix + self._node_field_count > len(self._node_list):
144 raise Exception('Snapshot node list too short')
146 type_ix = self._node_list[ix + self._node_type_ix]
147 type_string = self._node_types[int(type_ix)]
149 # edges_end is noninclusive (the index of the first edge that is not part of
151 edge_count = self._node_list[ix + self._node_edge_count_ix]
152 edges_end = edges_start + edge_count * self._edge_field_count
154 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(type_string):
157 name_ix = self._node_list[ix + self._node_name_ix]
158 node_id = self._node_list[ix + self._node_id_ix]
160 def ConstructorName(type_string, node_name_ix):
161 if type_string == 'object':
162 return self._strings[int(node_name_ix)]
163 return '(%s)' % type_string
165 ctor_name = ConstructorName(type_string, name_ix)
166 n = live_heap_object.LiveHeapObject(node_id, type_string, ctor_name)
167 if type_string == 'string':
168 n.string = self._strings[int(name_ix)]
170 for edge_ix in xrange(edges_start, edges_end, self._edge_field_count):
171 edge = self._ReadEdgeFromIndex(node_id, edge_ix)
173 # The edge will be associated with the other endpoint when all the data
177 self._node_dict[node_id] = n
181 def _IsNodeTypeUninteresting(type_string):
182 """Helper function for filtering out nodes from the heap snapshot.
185 type_string: str, type of the node.
187 bool, True if the node is of an uninteresting type and shouldn't be
188 included in the heap snapshot analysis.
190 uninteresting_types = ('hidden', 'code', 'number', 'native', 'synthetic')
191 return type_string in uninteresting_types
194 def _IsEdgeTypeUninteresting(edge_type_string):
195 """Helper function for filtering out edges from the heap snapshot.
198 edge_type_string: str, type of the edge.
200 bool, True if the edge is of an uninteresting type and shouldn't be
201 included in the heap snapshot analysis.
203 uninteresting_types = ('weak', 'hidden', 'internal')
204 return edge_type_string in uninteresting_types
206 def _ReadEdgeFromIndex(self, node_id, edge_ix):
207 """Reads the data for an edge from the heap snapshot.
210 node_id: int, id of the node which is the starting point of the edge.
211 edge_ix: int, index into the self._edge_list array.
213 Edge, if the index contains an interesting edge, otherwise None.
215 Exception: The node list of the snapshot is malformed.
217 if edge_ix + self._edge_field_count > len(self._edge_list):
218 raise Exception('Snapshot edge list too short')
220 edge_type_ix = self._edge_list[edge_ix + self._edge_type_ix]
221 edge_type_string = self._edge_types[int(edge_type_ix)]
223 if ChromeJsHeapSnapshotParser._IsEdgeTypeUninteresting(edge_type_string):
226 child_name_or_ix = self._edge_list[edge_ix + self._edge_name_or_ix_ix]
227 child_node_ix = self._edge_list[edge_ix + self._edge_to_node_ix]
229 # The child_node_ix is an index into the node list. Read the actual
231 child_node_type_ix = self._node_list[child_node_ix + self._node_type_ix]
232 child_node_type_string = self._node_types[int(child_node_type_ix)]
233 child_node_id = self._node_list[child_node_ix + self._node_id_ix]
235 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(
236 child_node_type_string):
239 child_name_string = ''
240 # For element nodes, the child has no name (only an index).
241 if (edge_type_string == 'element' or
242 int(child_name_or_ix) >= len(self._strings)):
243 child_name_string = str(child_name_or_ix)
245 child_name_string = self._strings[int(child_name_or_ix)]
246 return retaining_edge.RetainingEdge(node_id, child_node_id,
247 edge_type_string, child_name_string)