4 # Copyright 2012 the V8 project authors. All rights reserved.
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials provided
14 # with the distribution.
15 # * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived
17 # from this software without specific prior written permission.
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem
34 # debugging tools. Most importantly, this tool emits constants describing V8
37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values
38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields
39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy
40 # v8dbg_frametype_NAME = VALUE Describes stack frame values
41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets
42 # v8dbg_prop_NAME = OFFSET Object property offsets
43 # v8dbg_NAME = VALUE Miscellaneous values
45 # These constants are declared as global integers so that they'll be present in
46 # the generated libv8 binary.
53 # Miscellaneous constants, tags, and masks used for object identification.
56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },
58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },
59 { 'name': 'StringTag', 'value': 'kStringTag' },
60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' },
62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },
63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },
64 { 'name': 'AsciiStringTag', 'value': 'kOneByteStringTag' },
66 { 'name': 'StringRepresentationMask',
67 'value': 'kStringRepresentationMask' },
68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },
69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },
70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },
71 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' },
73 { 'name': 'FailureTag', 'value': 'kFailureTag' },
74 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },
75 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },
76 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },
77 { 'name': 'SmiTag', 'value': 'kSmiTag' },
78 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },
79 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },
80 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' },
81 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },
83 { 'name': 'prop_idx_first',
84 'value': 'DescriptorArray::kFirstIndex' },
85 { 'name': 'prop_type_field',
87 { 'name': 'prop_type_first_phantom',
88 'value': 'TRANSITION' },
89 { 'name': 'prop_type_mask',
90 'value': 'PropertyDetails::TypeField::kMask' },
92 { 'name': 'prop_desc_key',
93 'value': 'DescriptorArray::kDescriptorKey' },
94 { 'name': 'prop_desc_details',
95 'value': 'DescriptorArray::kDescriptorDetails' },
96 { 'name': 'prop_desc_value',
97 'value': 'DescriptorArray::kDescriptorValue' },
98 { 'name': 'prop_desc_size',
99 'value': 'DescriptorArray::kDescriptorSize' },
101 { 'name': 'off_fp_context',
102 'value': 'StandardFrameConstants::kContextOffset' },
103 { 'name': 'off_fp_constant_pool',
104 'value': 'StandardFrameConstants::kConstantPoolOffset' },
105 { 'name': 'off_fp_marker',
106 'value': 'StandardFrameConstants::kMarkerOffset' },
107 { 'name': 'off_fp_function',
108 'value': 'JavaScriptFrameConstants::kFunctionOffset' },
109 { 'name': 'off_fp_args',
110 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
114 # The following useful fields are missing accessors, so we define fake ones.
117 'HeapObject, map, Map, kMapOffset',
118 'JSObject, elements, Object, kElementsOffset',
119 'FixedArray, data, uintptr_t, kHeaderSize',
120 'Map, instance_attributes, int, kInstanceAttributesOffset',
121 'Map, inobject_properties, int, kInObjectPropertiesOffset',
122 'Map, instance_size, int, kInstanceSizeOffset',
123 'HeapNumber, value, double, kValueOffset',
124 'ConsString, first, String, kFirstOffset',
125 'ConsString, second, String, kSecondOffset',
126 'ExternalString, resource, Object, kResourceOffset',
127 'SeqOneByteString, chars, char, kHeaderSize',
128 'SeqTwoByteString, chars, char, kHeaderSize',
129 'SharedFunctionInfo, code, Code, kCodeOffset',
130 'SlicedString, parent, String, kParentOffset',
131 'Code, instruction_start, uintptr_t, kHeaderSize',
132 'Code, instruction_size, int, kInstructionSizeOffset',
136 # The following is a whitelist of classes we expect to find when scanning the
137 # source code. This list is not exhaustive, but it's still useful to identify
138 # when this script gets out of sync with the source. See load_objects().
141 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
142 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
143 'SeqOneByteString', 'SharedFunctionInfo'
148 # The following structures store high-level representations of the structures
149 # for which we're going to emit descriptive constants.
151 types = {}; # set of all type names
152 typeclasses = {}; # maps type names to corresponding class names
153 klasses = {}; # known classes, including parents
154 fields = []; # field declarations
158 * This file is generated by %s. Do not edit directly.
163 #include "frames-inl.h" /* for architecture-specific frame constants */
165 using namespace v8::internal;
169 /* stack frame constants */
170 #define FRAME_CONST(value, klass) \
171 int v8dbg_frametype_##klass = StackFrame::value;
173 STACK_FRAME_TYPE_LIST(FRAME_CONST)
184 # Loads class hierarchy and type information from "objects.h".
187 objfilename = sys.argv[2];
188 objfile = open(objfilename, 'r');
194 # Construct a dictionary for the classes we're sure should be present.
197 for klass in expected_classes:
198 checktypes[klass] = True;
201 # Iterate objects.h line-by-line to collect type and class information.
202 # For types, we accumulate a string representing the entire InstanceType
203 # enum definition and parse it later because it's easier to do so
204 # without the embedded newlines.
207 if (line.startswith('enum InstanceType {')):
211 if (in_insttype and line.startswith('};')):
215 line = re.sub('//.*', '', line.rstrip().lstrip());
221 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
225 klass = match.group(1);
226 pklass = match.group(3);
227 klasses[klass] = { 'parent': pklass };
230 # Process the instance type declaration.
232 entries = typestr.split(',');
233 for entry in entries:
234 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
237 # Infer class names for each type based on a systematic transformation.
238 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the
239 # class for each type rather than the other way around because there are
240 # fewer cases where one type maps to more than one class than the other
245 # Symbols and Strings are implemented using the same classes.
247 usetype = re.sub('SYMBOL_', 'STRING_', type);
250 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
252 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
255 # Remove the "_TYPE" suffix and then convert to camel case,
256 # except that a "JS" prefix remains uppercase (as in
257 # "JS_FUNCTION_TYPE" => "JSFunction").
259 if (not usetype.endswith('_TYPE')):
262 usetype = usetype[0:len(usetype) - len('_TYPE')];
263 parts = usetype.split('_');
266 if (parts[0] == 'JS'):
273 for ii in range(start, len(parts)):
275 cctype += part[0].upper() + part[1:].lower();
278 # Mapping string types is more complicated. Both types and
279 # class names for Strings specify a representation (e.g., Seq,
280 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
281 # In the simplest case, both of these are explicit in both
284 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
286 # However, either the representation or encoding can be omitted
287 # from the type name, in which case "Seq" and "TwoByte" are
290 # STRING_TYPE => SeqTwoByteString
292 # Additionally, sometimes the type name has more information
293 # than the class, as in:
295 # CONS_ASCII_STRING_TYPE => ConsString
297 # To figure this out dynamically, we first check for a
298 # representation and encoding and add them if they're not
299 # present. If that doesn't yield a valid class name, then we
300 # strip out the representation.
302 if (cctype.endswith('String')):
303 if (cctype.find('Cons') == -1 and
304 cctype.find('External') == -1 and
305 cctype.find('Sliced') == -1):
306 if (cctype.find('Ascii') != -1):
307 cctype = re.sub('AsciiString$',
308 'SeqOneByteString', cctype);
310 cctype = re.sub('String$',
311 'SeqString', cctype);
313 if (cctype.find('Ascii') == -1):
314 cctype = re.sub('String$', 'TwoByteString',
317 if (not (cctype in klasses)):
318 cctype = re.sub('Ascii', '', cctype);
319 cctype = re.sub('TwoByte', '', cctype);
322 # Despite all that, some types have no corresponding class.
324 if (cctype in klasses):
325 typeclasses[type] = cctype;
326 if (cctype in checktypes):
327 del checktypes[cctype];
329 if (len(checktypes) > 0):
330 for klass in checktypes:
331 print('error: expected class \"%s\" not found' % klass);
337 # For a given macro call, pick apart the arguments and return an object
338 # describing the corresponding output constant. See load_fields().
340 def parse_field(call):
341 # Replace newlines with spaces.
342 for ii in range(0, len(call)):
343 if (call[ii] == '\n'):
346 idx = call.find('(');
348 rest = call[idx + 1: len(call) - 1];
349 args = re.split('\s*,\s*', rest);
353 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
360 'name': 'class_%s__%s__%s' % (klass, field, dtype),
361 'value': '%s::%s' % (klass, offset)
364 assert(kind == 'SMI_ACCESSORS');
370 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
371 'value': '%s::%s' % (klass, offset)
375 # Load field offset information from objects-inl.h.
378 inlfilename = sys.argv[3];
379 inlfile = open(inlfilename, 'r');
382 # Each class's fields and the corresponding offsets are described in the
383 # source by calls to macros like "ACCESSORS" (and friends). All we do
384 # here is extract these macro invocations, taking into account that they
385 # may span multiple lines and may contain nested parentheses. We also
386 # call parse_field() to pick apart the invocation.
388 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
395 for ii in range(0, len(line)):
396 if (line[ii] == '('):
398 elif (line[ii] == ')'):
404 current += line[0:ii + 1];
407 for prefix in prefixes:
408 if (not line.startswith(prefix + '(')):
411 if (len(current) > 0):
412 fields.append(parse_field(current));
415 for ii in range(len(prefix), len(line)):
416 if (line[ii] == '('):
418 elif (line[ii] == ')'):
424 current += line[0:ii + 1];
426 if (len(current) > 0):
427 fields.append(parse_field(current));
430 for body in extras_accessors:
431 fields.append(parse_field('ACCESSORS(%s)' % body));
434 # Emit a block of constants.
436 def emit_set(out, consts):
437 # Fix up overzealous parses. This could be done inside the
438 # parsers but as there are several, it's easiest to do it here.
439 ws = re.compile('\s+')
441 name = ws.sub('', const['name'])
442 value = ws.sub('', str(const['value'])) # Can be a number.
443 out.write('int v8dbg_%s = %s;\n' % (name, value))
447 # Emit the whole output file.
450 out = file(sys.argv[1], 'w');
454 out.write('/* miscellaneous constants */\n');
455 emit_set(out, consts_misc);
457 out.write('/* class type information */\n');
459 keys = typeclasses.keys();
461 for typename in keys:
462 klass = typeclasses[typename];
464 'name': 'type_%s__%s' % (klass, typename),
468 emit_set(out, consts);
470 out.write('/* class hierarchy information */\n');
472 keys = klasses.keys();
474 for klassname in keys:
475 pklass = klasses[klassname]['parent'];
480 'name': 'parent_%s__%s' % (klassname, pklass),
484 emit_set(out, consts);
486 out.write('/* field information */\n');
487 emit_set(out, fields);
491 if (len(sys.argv) < 4):
492 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);