From a297d3f9af50440ae03a342578f0ff4e9899b7e9 Mon Sep 17 00:00:00 2001 From: "vegorov@chromium.org" Date: Thu, 2 Feb 2012 20:18:19 +0000 Subject: [PATCH] Optionally export metadata with libv8 to enable debuggers to inspect V8 state. Review URL: https://chromiumcodereview.appspot.com/8803024 Patch from David Pacheco . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10596 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- AUTHORS | 1 + build/common.gypi | 5 + tools/gen-postmortem-metadata.py | 478 +++++++++++++++++++++++++++++++++++++++ tools/gyp/v8.gyp | 33 +++ 4 files changed, 517 insertions(+) create mode 100644 tools/gen-postmortem-metadata.py diff --git a/AUTHORS b/AUTHORS index 148be41..1ff5ff6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,6 +8,7 @@ Sigma Designs Inc. ARM Ltd. Hewlett-Packard Development Company, LP Igalia, S.L. +Joyent, Inc. Akinori MUSHA Alexander Botero-Lowry diff --git a/build/common.gypi b/build/common.gypi index 7806d49..8201e2c 100644 --- a/build/common.gypi +++ b/build/common.gypi @@ -85,6 +85,11 @@ 'v8_use_liveobjectlist%': 'false', 'werror%': '-Werror', + # With post mortem support enabled, metadata is embedded into libv8 that + # describes various parameters of the VM for use by debuggers. See + # tools/gen-postmortem-metadata.py for details. + 'v8_postmortem_support%': 'false', + # For a shared library build, results in "libv8-<(soname_version).so". 'soname_version%': '', }, diff --git a/tools/gen-postmortem-metadata.py b/tools/gen-postmortem-metadata.py new file mode 100644 index 0000000..4aa8f5d --- /dev/null +++ b/tools/gen-postmortem-metadata.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python + +# +# Copyright 2012 the V8 project authors. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# Emits a C++ file to be compiled and linked into libv8 to support postmortem +# debugging tools. Most importantly, this tool emits constants describing V8 +# internals: +# +# v8dbg_type_CLASS__TYPE = VALUE Describes class type values +# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields +# v8dbg_parent_CLASS__PARENT Describes class hierarchy +# v8dbg_frametype_NAME = VALUE Describes stack frame values +# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets +# v8dbg_prop_NAME = OFFSET Object property offsets +# v8dbg_NAME = VALUE Miscellaneous values +# +# These constants are declared as global integers so that they'll be present in +# the generated libv8 binary. +# + +import re +import sys + +# +# Miscellaneous constants, tags, and masks used for object identification. +# +consts_misc = [ + { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, + + { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, + { 'name': 'StringTag', 'value': 'kStringTag' }, + { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, + + { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, + { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, + { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' }, + + { 'name': 'StringRepresentationMask', + 'value': 'kStringRepresentationMask' }, + { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, + { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, + { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, + + { 'name': 'FailureTag', 'value': 'kFailureTag' }, + { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, + { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, + { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, + { 'name': 'SmiTag', 'value': 'kSmiTag' }, + { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, + { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, + { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, + + { 'name': 'prop_idx_content', + 'value': 'DescriptorArray::kContentArrayIndex' }, + { 'name': 'prop_idx_first', + 'value': 'DescriptorArray::kFirstIndex' }, + { 'name': 'prop_type_field', + 'value': 'FIELD' }, + { 'name': 'prop_type_first_phantom', + 'value': 'MAP_TRANSITION' }, + { 'name': 'prop_type_mask', + 'value': 'PropertyDetails::TypeField::kMask' }, + + { 'name': 'off_fp_context', + 'value': 'StandardFrameConstants::kContextOffset' }, + { 'name': 'off_fp_marker', + 'value': 'StandardFrameConstants::kMarkerOffset' }, + { 'name': 'off_fp_function', + 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, + { 'name': 'off_fp_args', + 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, +]; + +# +# The following useful fields are missing accessors, so we define fake ones. +# +extras_accessors = [ + 'HeapObject, map, Map, kMapOffset', + 'JSObject, elements, Object, kElementsOffset', + 'FixedArray, data, uintptr_t, kHeaderSize', + 'Map, instance_attributes, int, kInstanceAttributesOffset', + 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset', + 'Map, inobject_properties, int, kInObjectPropertiesOffset', + 'Map, instance_size, int, kInstanceSizeOffset', + 'HeapNumber, value, double, kValueOffset', + 'ConsString, first, String, kFirstOffset', + 'ConsString, second, String, kSecondOffset', + 'ExternalString, resource, Object, kResourceOffset', + 'SeqAsciiString, chars, char, kHeaderSize', +]; + +# +# The following is a whitelist of classes we expect to find when scanning the +# source code. This list is not exhaustive, but it's still useful to identify +# when this script gets out of sync with the source. See load_objects(). +# +expected_classes = [ + 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', + 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', + 'SeqAsciiString', 'SharedFunctionInfo' +]; + + +# +# The following structures store high-level representations of the structures +# for which we're going to emit descriptive constants. +# +types = {}; # set of all type names +typeclasses = {}; # maps type names to corresponding class names +klasses = {}; # known classes, including parents +fields = []; # field declarations + +header = ''' +/* + * This file is generated by %s. Do not edit directly. + */ + +#include "v8.h" +#include "frames.h" +#include "frames-inl.h" /* for architecture-specific frame constants */ + +using namespace v8::internal; + +extern "C" { + +/* stack frame constants */ +#define FRAME_CONST(value, klass) \ + int v8dbg_frametype_##klass = StackFrame::value; + +STACK_FRAME_TYPE_LIST(FRAME_CONST) + +#undef FRAME_CONST + +''' % sys.argv[0]; + +footer = ''' +} +''' + +# +# Loads class hierarchy and type information from "objects.h". +# +def load_objects(): + objfilename = sys.argv[2]; + objfile = open(objfilename, 'r'); + in_insttype = False; + + typestr = ''; + + # + # Construct a dictionary for the classes we're sure should be present. + # + checktypes = {}; + for klass in expected_classes: + checktypes[klass] = True; + + # + # Iterate objects.h line-by-line to collect type and class information. + # For types, we accumulate a string representing the entire InstanceType + # enum definition and parse it later because it's easier to do so + # without the embedded newlines. + # + for line in objfile: + if (line.startswith('enum InstanceType {')): + in_insttype = True; + continue; + + if (in_insttype and line.startswith('};')): + in_insttype = False; + continue; + + line = re.sub('//.*', '', line.rstrip().lstrip()); + + if (in_insttype): + typestr += line; + continue; + + match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', + line); + + if (match): + klass = match.group(1); + pklass = match.group(3); + klasses[klass] = { 'parent': pklass }; + + # + # Process the instance type declaration. + # + entries = typestr.split(','); + for entry in entries: + types[re.sub('\s*=.*', '', entry).lstrip()] = True; + + # + # Infer class names for each type based on a systematic transformation. + # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the + # class for each type rather than the other way around because there are + # fewer cases where one type maps to more than one class than the other + # way around. + # + for type in types: + # + # Symbols and Strings are implemented using the same classes. + # + usetype = re.sub('SYMBOL_', 'STRING_', type); + + # + # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. + # + usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); + + # + # Remove the "_TYPE" suffix and then convert to camel case, + # except that a "JS" prefix remains uppercase (as in + # "JS_FUNCTION_TYPE" => "JSFunction"). + # + if (not usetype.endswith('_TYPE')): + continue; + + usetype = usetype[0:len(usetype) - len('_TYPE')]; + parts = usetype.split('_'); + cctype = ''; + + if (parts[0] == 'JS'): + cctype = 'JS'; + start = 1; + else: + cctype = ''; + start = 0; + + for ii in range(start, len(parts)): + part = parts[ii]; + cctype += part[0].upper() + part[1:].lower(); + + # + # Mapping string types is more complicated. Both types and + # class names for Strings specify a representation (e.g., Seq, + # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), + # In the simplest case, both of these are explicit in both + # names, as in: + # + # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString + # + # However, either the representation or encoding can be omitted + # from the type name, in which case "Seq" and "TwoByte" are + # assumed, as in: + # + # STRING_TYPE => SeqTwoByteString + # + # Additionally, sometimes the type name has more information + # than the class, as in: + # + # CONS_ASCII_STRING_TYPE => ConsString + # + # To figure this out dynamically, we first check for a + # representation and encoding and add them if they're not + # present. If that doesn't yield a valid class name, then we + # strip out the representation. + # + if (cctype.endswith('String')): + if (cctype.find('Cons') == -1 and + cctype.find('External') == -1 and + cctype.find('Sliced') == -1): + if (cctype.find('Ascii') != -1): + cctype = re.sub('AsciiString$', + 'SeqAsciiString', cctype); + else: + cctype = re.sub('String$', + 'SeqString', cctype); + + if (cctype.find('Ascii') == -1): + cctype = re.sub('String$', 'TwoByteString', + cctype); + + if (not (cctype in klasses)): + cctype = re.sub('Ascii', '', cctype); + cctype = re.sub('TwoByte', '', cctype); + + # + # Despite all that, some types have no corresponding class. + # + if (cctype in klasses): + typeclasses[type] = cctype; + if (cctype in checktypes): + del checktypes[cctype]; + + if (len(checktypes) > 0): + for klass in checktypes: + print('error: expected class \"%s\" not found' % klass); + + sys.exit(1); + + +# +# For a given macro call, pick apart the arguments and return an object +# describing the corresponding output constant. See load_fields(). +# +def parse_field(call): + # Replace newlines with spaces. + for ii in range(0, len(call)): + if (call[ii] == '\n'): + call[ii] == ' '; + + idx = call.find('('); + kind = call[0:idx]; + rest = call[idx + 1: len(call) - 1]; + args = re.split('\s*,\s*', rest); + + consts = []; + + if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): + klass = args[0]; + field = args[1]; + dtype = args[2]; + offset = args[3]; + + return ({ + 'name': 'class_%s__%s__%s' % (klass, field, dtype), + 'value': '%s::%s' % (klass, offset) + }); + + assert(kind == 'SMI_ACCESSORS'); + klass = args[0]; + field = args[1]; + offset = args[2]; + + return ({ + 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), + 'value': '%s::%s' % (klass, offset) + }); + +# +# Load field offset information from objects-inl.h. +# +def load_fields(): + inlfilename = sys.argv[3]; + inlfile = open(inlfilename, 'r'); + + # + # Each class's fields and the corresponding offsets are described in the + # source by calls to macros like "ACCESSORS" (and friends). All we do + # here is extract these macro invocations, taking into account that they + # may span multiple lines and may contain nested parentheses. We also + # call parse_field() to pick apart the invocation. + # + prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; + current = ''; + opens = 0; + + for line in inlfile: + if (opens > 0): + # Continuation line + for ii in range(0, len(line)): + if (line[ii] == '('): + opens += 1; + elif (line[ii] == ')'): + opens -= 1; + + if (opens == 0): + break; + + current += line[0:ii + 1]; + continue; + + for prefix in prefixes: + if (not line.startswith(prefix + '(')): + continue; + + if (len(current) > 0): + fields.append(parse_field(current)); + current = ''; + + for ii in range(len(prefix), len(line)): + if (line[ii] == '('): + opens += 1; + elif (line[ii] == ')'): + opens -= 1; + + if (opens == 0): + break; + + current += line[0:ii + 1]; + + if (len(current) > 0): + fields.append(parse_field(current)); + current = ''; + + for body in extras_accessors: + fields.append(parse_field('ACCESSORS(%s)' % body)); + +# +# Emit a block of constants. +# +def emit_set(out, consts): + for ii in range(0, len(consts)): + out.write('int v8dbg_%s = %s;\n' % + (consts[ii]['name'], consts[ii]['value'])); + out.write('\n'); + +# +# Emit the whole output file. +# +def emit_config(): + out = file(sys.argv[1], 'w'); + + out.write(header); + + out.write('/* miscellaneous constants */\n'); + emit_set(out, consts_misc); + + out.write('/* class type information */\n'); + consts = []; + keys = typeclasses.keys(); + keys.sort(); + for typename in keys: + klass = typeclasses[typename]; + consts.append({ + 'name': 'type_%s__%s' % (klass, typename), + 'value': typename + }); + + emit_set(out, consts); + + out.write('/* class hierarchy information */\n'); + consts = []; + keys = klasses.keys(); + keys.sort(); + for klassname in keys: + pklass = klasses[klassname]['parent']; + if (pklass == None): + continue; + + consts.append({ + 'name': 'parent_%s__%s' % (klassname, pklass), + 'value': 0 + }); + + emit_set(out, consts); + + out.write('/* field information */\n'); + emit_set(out, fields); + + out.write(footer); + +if (len(sys.argv) < 4): + print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); + sys.exit(2); + +load_objects(); +load_fields(); +emit_config(); diff --git a/tools/gyp/v8.gyp b/tools/gyp/v8.gyp index 0eeda18..e60a232 100644 --- a/tools/gyp/v8.gyp +++ b/tools/gyp/v8.gyp @@ -730,6 +730,11 @@ 'V8_SHARED', ], }], + ['v8_postmortem_support=="true"', { + 'sources': [ + '<(SHARED_INTERMEDIATE_DIR)/debug-support.cc', + ] + }], ], }, { @@ -806,6 +811,34 @@ ], }, { + 'target_name': 'postmortem-metadata', + 'type': 'none', + 'variables': { + 'heapobject_files': [ + '../../src/objects.h', + '../../src/objects-inl.h', + ], + }, + 'actions': [ + { + 'action_name': 'gen-postmortem-metadata', + 'inputs': [ + '../../tools/gen-postmortem-metadata.py', + '<@(heapobject_files)', + ], + 'outputs': [ + '<(SHARED_INTERMEDIATE_DIR)/debug-support.cc', + ], + 'action': [ + 'python', + '../../tools/gen-postmortem-metadata.py', + '<@(_outputs)', + '<@(heapobject_files)' + ] + } + ] + }, + { 'target_name': 'mksnapshot', 'type': 'executable', 'dependencies': [ -- 2.7.4