1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
7 from telemetry.core.heap import live_heap_object
8 from telemetry.core.heap import retaining_edge
11 class ChromeJsHeapSnapshotParser(object):
12 """ Parser for the heap snapshot.
14 The heap snapshot JSON format is defined by HeapSnapshotJSONSerializer in V8.
16 The snapshot contains a list of integers describing nodes (types, names, etc.)
17 and a list of integers describing edges (types, the node the edge points to,
18 etc.) and a string table. All strings are expressed as indices to the string
21 In addition, the snapshot contains meta information describing the data fields
22 for nodes and the data fields for edges.
25 _node_dict: {int -> LiveHeapObject}, maps integer ids to LiveHeapObject
27 _node_list: [int], the raw node data of the heap snapshot.
28 _edge_list: [int], the raw edge data of the heap snapshot.
29 _node_types: [str], the possible node types in the heap snapshot.
30 _edge_types: [str], the possible edge types in the heap snapshot.
31 _node_fields: [str], the fields present in the heap snapshot for each node.
32 _edge_fields: [str], the fields present in the heap snapshot for each node.
33 _node_type_ix: int, index of the node type field.
34 _node_name_ix: int, index of the node name field.
35 _node_id_ix: int, index of the node id field.
36 _node_edge_count_ix: int, index of the node edge count field.
37 _node_field_count: int, number of node fields.
38 _edge_type_ix: int, index of the edge type field.
39 _edge_name_or_ix_ix: int, index of the "edge name or index" field.
40 _edge_to_node_ix: int, index of the "to node for an edge" field.
41 _edge_field_count: int, number of edge fields.
44 def __init__(self, raw_data):
45 heap = json.loads(raw_data)
48 # Read the snapshot components (nodes, edges, strings, metadata).
49 self._node_list = heap['nodes']
50 self._edge_list = heap['edges']
51 self._strings = heap['strings']
53 self._node_types = heap['snapshot']['meta']['node_types'][0]
54 self._edge_types = heap['snapshot']['meta']['edge_types'][0]
55 node_fields = heap['snapshot']['meta']['node_fields']
56 edge_fields = heap['snapshot']['meta']['edge_fields']
58 # Find the indices of the required node and edge fields based on the
60 self._node_type_ix = node_fields.index('type')
61 self._node_name_ix = node_fields.index('name')
62 self._node_id_ix = node_fields.index('id')
63 self._node_edge_count_ix = node_fields.index('edge_count')
64 self._node_field_count = len(node_fields)
66 self._edge_type_ix = edge_fields.index('type')
67 self._edge_name_or_ix_ix = edge_fields.index('name_or_index')
68 self._edge_to_node_ix = edge_fields.index('to_node')
69 self._edge_field_count = len(edge_fields)
74 def CanImport(raw_data):
75 heap = json.loads(raw_data)
76 if ('nodes' not in heap or 'edges' not in heap or 'strings' not in heap or
77 'snapshot' not in heap or 'meta' not in heap['snapshot']):
79 meta = heap['snapshot']['meta']
80 if ('node_types' not in meta or 'edge_types' not in meta or
81 'node_fields' not in meta or 'edge_fields' not in meta):
83 node_fields = meta['node_fields']
84 edge_fields = meta['edge_fields']
85 if ('type' not in node_fields or 'name' not in node_fields or
86 'id' not in node_fields or 'edge_count' not in node_fields):
88 if ('type' not in edge_fields or 'name_or_index' not in edge_fields or
89 'to_node' not in edge_fields):
93 def GetAllLiveHeapObjects(self):
94 return self._node_dict.values()
97 def LiveHeapObjectToJavaScript(heap_object):
98 return heap_object.name or str(heap_object)
101 def RetainingEdgeToJavaScript(edge):
102 if edge.type_string == 'property':
103 return '.' + edge.name_string
104 if edge.type_string == 'element':
105 return '[' + edge.name_string + ']'
108 def _ParseSnapshot(self):
109 """Parses the stored JSON snapshot data.
111 Fills in self._node_dict with LiveHeapObject objects constructed based on
112 the heap snapshot. The LiveHeapObject objects contain the associated
113 RetainingEdge objects.
116 for ix in xrange(0, len(self._node_list), self._node_field_count):
117 edge_start_ix = self._ReadNodeFromIndex(ix, edge_start_ix)
119 # Add pointers to the endpoints to the edges, and associate the edges with
121 for node_id in self._node_dict:
122 n = self._node_dict[node_id]
123 for e in n.edges_from:
124 self._node_dict[e.to_object_id].AddEdgeTo(e)
126 e.SetToObject(self._node_dict[e.to_object_id])
128 def _ReadNodeFromIndex(self, ix, edges_start):
129 """Reads the data for a node from the heap snapshot.
131 If the index contains an interesting node, constructs a Node object and adds
132 it to self._node_dict.
135 ix: int, index into the self._node_list array.
136 edges_start: int, the index of the edge array where the edges for the node
139 int, the edge start index for the next node.
142 Exception: The node list of the snapshot is malformed.
144 if ix + self._node_field_count > len(self._node_list):
145 raise Exception('Snapshot node list too short')
147 type_ix = self._node_list[ix + self._node_type_ix]
148 type_string = self._node_types[int(type_ix)]
150 # edges_end is noninclusive (the index of the first edge that is not part of
152 edge_count = self._node_list[ix + self._node_edge_count_ix]
153 edges_end = edges_start + edge_count * self._edge_field_count
155 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(type_string):
158 name_ix = self._node_list[ix + self._node_name_ix]
159 node_id = self._node_list[ix + self._node_id_ix]
161 def ConstructorName(type_string, node_name_ix):
162 if type_string == 'object':
163 return self._strings[int(node_name_ix)]
164 return '(%s)' % type_string
166 ctor_name = ConstructorName(type_string, name_ix)
167 n = live_heap_object.LiveHeapObject(node_id, type_string, ctor_name)
168 if type_string == 'string':
169 n.string = self._strings[int(name_ix)]
171 for edge_ix in xrange(edges_start, edges_end, self._edge_field_count):
172 edge = self._ReadEdgeFromIndex(node_id, edge_ix)
174 # The edge will be associated with the other endpoint when all the data
178 self._node_dict[node_id] = n
182 def _IsNodeTypeUninteresting(type_string):
183 """Helper function for filtering out nodes from the heap snapshot.
186 type_string: str, type of the node.
188 bool, True if the node is of an uninteresting type and shouldn't be
189 included in the heap snapshot analysis.
191 uninteresting_types = ('hidden', 'code', 'number', 'native', 'synthetic')
192 return type_string in uninteresting_types
195 def _IsEdgeTypeUninteresting(edge_type_string):
196 """Helper function for filtering out edges from the heap snapshot.
199 edge_type_string: str, type of the edge.
201 bool, True if the edge is of an uninteresting type and shouldn't be
202 included in the heap snapshot analysis.
204 uninteresting_types = ('weak', 'hidden', 'internal')
205 return edge_type_string in uninteresting_types
207 def _ReadEdgeFromIndex(self, node_id, edge_ix):
208 """Reads the data for an edge from the heap snapshot.
211 node_id: int, id of the node which is the starting point of the edge.
212 edge_ix: int, index into the self._edge_list array.
214 Edge, if the index contains an interesting edge, otherwise None.
216 Exception: The node list of the snapshot is malformed.
218 if edge_ix + self._edge_field_count > len(self._edge_list):
219 raise Exception('Snapshot edge list too short')
221 edge_type_ix = self._edge_list[edge_ix + self._edge_type_ix]
222 edge_type_string = self._edge_types[int(edge_type_ix)]
224 if ChromeJsHeapSnapshotParser._IsEdgeTypeUninteresting(edge_type_string):
227 child_name_or_ix = self._edge_list[edge_ix + self._edge_name_or_ix_ix]
228 child_node_ix = self._edge_list[edge_ix + self._edge_to_node_ix]
230 # The child_node_ix is an index into the node list. Read the actual
232 child_node_type_ix = self._node_list[child_node_ix + self._node_type_ix]
233 child_node_type_string = self._node_types[int(child_node_type_ix)]
234 child_node_id = self._node_list[child_node_ix + self._node_id_ix]
236 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(
237 child_node_type_string):
240 child_name_string = ''
241 # For element nodes, the child has no name (only an index).
242 if (edge_type_string == 'element' or
243 int(child_name_or_ix) >= len(self._strings)):
244 child_name_string = str(child_name_or_ix)
246 child_name_string = self._strings[int(child_name_or_ix)]
247 return retaining_edge.RetainingEdge(node_id, child_node_id,
248 edge_type_string, child_name_string)