From 23880790515b91d1c9460f422c53e4abeab22ff6 Mon Sep 17 00:00:00 2001 From: "mikhail.naganov@gmail.com" Date: Tue, 14 Sep 2010 11:49:06 +0000 Subject: [PATCH] Implement heap snapshots serialization into JSON. API is designed to avoid storing serialized snapshot on VM, instead it is emitted using output stream interface. The size of JSON emitted is roughly equal to used heap size (when stored as an ASCII string). Now a whole heap snapshot can be serialized and transmitted outside VM. This makes possible: - implementing non-async UI for heap snapshots inspection; - storing heap snapshots for further inspection; - remote profiling (we can even implement a snapshotting mode where a snapshot isn't even stored in VM, only transmitted -- good for mobile devices); - creating tools for outside heap snapshots processing, e.g. converting to HPROF. Review URL: http://codereview.chromium.org/3311028 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5450 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- include/v8-profiler.h | 29 +++- include/v8.h | 20 +++ src/api.cc | 17 ++ src/profile-generator.cc | 328 ++++++++++++++++++++++++++++++++++++++ src/profile-generator.h | 47 ++++++ src/unicode.h | 6 +- test/cctest/test-heap-profiler.cc | 134 ++++++++++++++++ 7 files changed, 577 insertions(+), 4 deletions(-) diff --git a/include/v8-profiler.h b/include/v8-profiler.h index dd1b8ca..27da418 100644 --- a/include/v8-profiler.h +++ b/include/v8-profiler.h @@ -323,7 +323,10 @@ class V8EXPORT HeapSnapshot { enum Type { kFull = 0, // Heap snapshot with all instances and references. kAggregated = 1 // Snapshot doesn't contain individual heap entries, - //instead they are grouped by constructor name. + // instead they are grouped by constructor name. + }; + enum SerializationFormat { + kJSON = 0 // See format description near 'Serialize' method. }; /** Returns heap snapshot type. */ @@ -343,6 +346,30 @@ class V8EXPORT HeapSnapshot { * of the same type can be compared. */ const HeapSnapshotsDiff* CompareWith(const HeapSnapshot* snapshot) const; + + /** + * Prepare a serialized representation of the snapshot. The result + * is written into the stream provided in chunks of specified size. + * The total length of the serialized snapshot is unknown in + * advance, it is can be roughly equal to JS heap size (that means, + * it can be really big - tens of megabytes). + * + * For the JSON format, heap contents are represented as an object + * with the following structure: + * + * { + * snapshot: {title: "...", uid: nnn}, + * nodes: [ + * meta-info (JSON string), + * nodes themselves + * ], + * strings: [strings] + * } + * + * Outgoing node links are stored after each node. Nodes reference strings + * and other nodes by their indexes in corresponding arrays. + */ + void Serialize(OutputStream* stream, SerializationFormat format) const; }; diff --git a/include/v8.h b/include/v8.h index b89c244..03bc401 100644 --- a/include/v8.h +++ b/include/v8.h @@ -3196,6 +3196,26 @@ class V8EXPORT Locker { }; +/** + * An interface for exporting data from V8, using "push" model. + */ +class V8EXPORT OutputStream { +public: + enum OutputEncoding { + kAscii = 0 // 7-bit ASCII. + }; + virtual ~OutputStream() {} + /** Notify about the end of stream. */ + virtual void EndOfStream() = 0; + /** Get preferred output chunk size. Called only once. */ + virtual int GetChunkSize() { return 1024; } + /** Get preferred output encoding. Called only once. */ + virtual OutputEncoding GetOutputEncoding() { return kAscii; } + /** Writes the next chunk of snapshot data into the stream. */ + virtual void WriteAsciiChunk(char* data, int size) = 0; +}; + + // --- I m p l e m e n t a t i o n --- diff --git a/src/api.cc b/src/api.cc index 0d01fcc..e09d4c9 100644 --- a/src/api.cc +++ b/src/api.cc @@ -4739,6 +4739,23 @@ const HeapSnapshotsDiff* HeapSnapshot::CompareWith( } +void HeapSnapshot::Serialize(OutputStream* stream, + HeapSnapshot::SerializationFormat format) const { + IsDeadCheck("v8::HeapSnapshot::Serialize"); + ApiCheck(format == kJSON, + "v8::HeapSnapshot::Serialize", + "Unknown serialization format"); + ApiCheck(stream->GetOutputEncoding() == OutputStream::kAscii, + "v8::HeapSnapshot::Serialize", + "Unsupported output encoding"); + ApiCheck(stream->GetChunkSize() > 0, + "v8::HeapSnapshot::Serialize", + "Invalid stream chunk size"); + i::HeapSnapshotJSONSerializer serializer(ToInternal(this)); + serializer.Serialize(stream); +} + + int HeapProfiler::GetSnapshotsCount() { IsDeadCheck("v8::HeapProfiler::GetSnapshotsCount"); return i::HeapProfiler::GetSnapshotsCount(); diff --git a/src/profile-generator.cc b/src/profile-generator.cc index 2de7a2f..d974001 100644 --- a/src/profile-generator.cc +++ b/src/profile-generator.cc @@ -31,6 +31,7 @@ #include "global-handles.h" #include "scopeinfo.h" #include "top.h" +#include "unicode.h" #include "zone-inl.h" #include "profile-generator-inl.h" @@ -2132,6 +2133,333 @@ HeapSnapshotsDiff* HeapSnapshotsComparator::Compare(HeapSnapshot* snapshot1, return diff; } + +class OutputStreamWriter { + public: + explicit OutputStreamWriter(v8::OutputStream* stream) + : stream_(stream), + chunk_size_(stream->GetChunkSize()), + chunk_(chunk_size_), + chunk_pos_(0) { + ASSERT(chunk_size_ > 0); + } + void AddCharacter(char c) { + ASSERT(c != '\0'); + ASSERT(chunk_pos_ < chunk_size_); + chunk_[chunk_pos_++] = c; + MaybeWriteChunk(); + } + void AddString(const char* s) { + AddSubstring(s, StrLength(s)); + } + void AddSubstring(const char* s, int n) { + if (n <= 0) return; + ASSERT(static_cast(n) <= strlen(s)); + const char* s_end = s + n; + while (s < s_end) { + int s_chunk_size = Min( + chunk_size_ - chunk_pos_, static_cast(s_end - s)); + ASSERT(s_chunk_size > 0); + memcpy(chunk_.start() + chunk_pos_, s, s_chunk_size); + s += s_chunk_size; + chunk_pos_ += s_chunk_size; + MaybeWriteChunk(); + } + } + void AddNumber(int n) { AddNumberImpl(n, "%d"); } + void AddNumber(unsigned n) { AddNumberImpl(n, "%u"); } + void AddNumber(uint64_t n) { AddNumberImpl(n, "%llu"); } + void Finalize() { + ASSERT(chunk_pos_ < chunk_size_); + if (chunk_pos_ != 0) { + WriteChunk(); + } + stream_->EndOfStream(); + } + + private: + template + void AddNumberImpl(T n, const char* format) { + ScopedVector buffer(32); + int result = OS::SNPrintF(buffer, format, n); + USE(result); + ASSERT(result != -1); + AddString(buffer.start()); + } + void MaybeWriteChunk() { + ASSERT(chunk_pos_ <= chunk_size_); + if (chunk_pos_ == chunk_size_) { + WriteChunk(); + chunk_pos_ = 0; + } + } + void WriteChunk() { + stream_->WriteAsciiChunk(chunk_.start(), chunk_pos_); + } + + v8::OutputStream* stream_; + int chunk_size_; + ScopedVector chunk_; + int chunk_pos_; +}; + +void HeapSnapshotJSONSerializer::Serialize(v8::OutputStream* stream) { + ASSERT(writer_ == NULL); + writer_ = new OutputStreamWriter(stream); + + // Since nodes graph is cyclic, we need the first pass to enumerate + // them. Strings can be serialized in one pass. + EnumerateNodes(); + + writer_->AddCharacter('{'); + writer_->AddString("\"snapshot\":{"); + SerializeSnapshot(); + writer_->AddString("},\n"); + writer_->AddString("\"nodes\":["); + SerializeNodes(); + writer_->AddString("],\n"); + writer_->AddString("\"strings\":["); + SerializeStrings(); + writer_->AddCharacter(']'); + writer_->AddCharacter('}'); + writer_->Finalize(); + + delete writer_; + writer_ = NULL; +} + + +class HeapSnapshotJSONSerializerEnumerator { + public: + explicit HeapSnapshotJSONSerializerEnumerator(HeapSnapshotJSONSerializer* s) + : s_(s) { + } + void Apply(HeapEntry** entry) { + s_->GetNodeId(*entry); + } + private: + HeapSnapshotJSONSerializer* s_; +}; + +void HeapSnapshotJSONSerializer::EnumerateNodes() { + GetNodeId(snapshot_->root()); // Make sure root gets the first id. + HeapSnapshotJSONSerializerEnumerator iter(this); + snapshot_->IterateEntries(&iter); +} + + +int HeapSnapshotJSONSerializer::GetNodeId(HeapEntry* entry) { + HashMap::Entry* cache_entry = nodes_.Lookup(entry, ObjectHash(entry), true); + if (cache_entry->value == NULL) { + cache_entry->value = reinterpret_cast(next_node_id_++); + } + return static_cast(reinterpret_cast(cache_entry->value)); +} + + +int HeapSnapshotJSONSerializer::GetStringId(const char* s) { + HashMap::Entry* cache_entry = strings_.Lookup( + const_cast(s), ObjectHash(s), true); + if (cache_entry->value == NULL) { + cache_entry->value = reinterpret_cast(next_string_id_++); + } + return static_cast(reinterpret_cast(cache_entry->value)); +} + + +void HeapSnapshotJSONSerializer::SerializeEdge(HeapGraphEdge* edge) { + writer_->AddCharacter(','); + writer_->AddNumber(edge->type()); + writer_->AddCharacter(','); + if (edge->type() == HeapGraphEdge::kElement) { + writer_->AddNumber(edge->index()); + } else { + writer_->AddNumber(GetStringId(edge->name())); + } + writer_->AddCharacter(','); + writer_->AddNumber(GetNodeId(edge->to())); +} + + +void HeapSnapshotJSONSerializer::SerializeNode(HeapEntry* entry) { + writer_->AddCharacter('\n'); + writer_->AddCharacter(','); + writer_->AddNumber(entry->type()); + writer_->AddCharacter(','); + writer_->AddNumber(GetStringId(entry->name())); + writer_->AddCharacter(','); + writer_->AddNumber(entry->id()); + writer_->AddCharacter(','); + writer_->AddNumber(entry->self_size()); + Vector children = entry->children(); + writer_->AddCharacter(','); + writer_->AddNumber(children.length()); + for (int i = 0; i < children.length(); ++i) { + SerializeEdge(&children[i]); + } +} + + +void HeapSnapshotJSONSerializer::SerializeNodes() { + // The first (zero) item of nodes array is a JSON-ified object + // describing node serialization layout. + // We use a set of macros to improve readability. +#define JSON_A(s) "["s"]" +#define JSON_O(s) "{"s"}" +#define JSON_S(s) "\\\""s"\\\"" + writer_->AddString("\"" JSON_O( + JSON_S("fields") ":" JSON_A( + JSON_S("type") + "," JSON_S("name") + "," JSON_S("id") + "," JSON_S("self_size") + "," JSON_S("children_count") + "," JSON_S("children")) + "," JSON_S("types") ":" JSON_A( + JSON_A( + JSON_S("internal") + "," JSON_S("array") + "," JSON_S("string") + "," JSON_S("object") + "," JSON_S("code") + "," JSON_S("closure")) + "," JSON_S("string") + "," JSON_S("number") + "," JSON_S("number") + "," JSON_S("number") + "," JSON_O( + JSON_S("fields") ":" JSON_A( + JSON_S("type") + "," JSON_S("name_or_index") + "," JSON_S("to_node")) + "," JSON_S("types") ":" JSON_A( + JSON_A( + JSON_S("context") + "," JSON_S("element") + "," JSON_S("property") + "," JSON_S("internal")) + "," JSON_S("string_or_number") + "," JSON_S("node"))))) "\""); +#undef JSON_S +#undef JSON_O +#undef JSON_A + + const int node_fields_count = 5; // type,name,id,self_size,children_count. + const int edge_fields_count = 3; // type,name|index,to_node. + List sorted_nodes; + SortHashMap(&nodes_, &sorted_nodes); + // Rewrite node ids, so they refer to actual array positions. + if (sorted_nodes.length() > 1) { + // Nodes start from array index 1. + int prev_value = 1; + sorted_nodes[0]->value = reinterpret_cast(prev_value); + for (int i = 1; i < sorted_nodes.length(); ++i) { + HeapEntry* prev_heap_entry = + reinterpret_cast(sorted_nodes[i-1]->key); + prev_value += node_fields_count + + prev_heap_entry->children().length() * edge_fields_count; + sorted_nodes[i]->value = reinterpret_cast(prev_value); + } + } + for (int i = 0; i < sorted_nodes.length(); ++i) { + SerializeNode(reinterpret_cast(sorted_nodes[i]->key)); + } +} + + +void HeapSnapshotJSONSerializer::SerializeSnapshot() { + writer_->AddString("\"title\":\""); + writer_->AddString(snapshot_->title()); + writer_->AddString("\""); + writer_->AddString(",\"uid\":"); + writer_->AddNumber(snapshot_->uid()); +} + + +static void WriteUChar(OutputStreamWriter* w, unibrow::uchar u) { + static const char hex_chars[] = "0123456789ABCDEF"; + w->AddString("\\u"); + w->AddCharacter(hex_chars[(u >> 12) & 0xf]); + w->AddCharacter(hex_chars[(u >> 8) & 0xf]); + w->AddCharacter(hex_chars[(u >> 4) & 0xf]); + w->AddCharacter(hex_chars[u & 0xf]); +} + +void HeapSnapshotJSONSerializer::SerializeString(const unsigned char* s) { + writer_->AddCharacter('\n'); + writer_->AddCharacter('\"'); + for ( ; *s != '\0'; ++s) { + switch (*s) { + case '\b': + writer_->AddString("\\b"); + continue; + case '\f': + writer_->AddString("\\f"); + continue; + case '\n': + writer_->AddString("\\n"); + continue; + case '\r': + writer_->AddString("\\r"); + continue; + case '\t': + writer_->AddString("\\t"); + continue; + case '\"': + case '\\': + writer_->AddCharacter('\\'); + writer_->AddCharacter(*s); + continue; + default: + if (*s > 31 && *s < 128) { + writer_->AddCharacter(*s); + } else if (*s <= 31) { + // Special character with no dedicated literal. + WriteUChar(writer_, *s); + } else { + // Convert UTF-8 into \u UTF-16 literal. + unsigned length = 1, cursor = 0; + for ( ; length <= 4 && *(s + length) != '\0'; ++length) { } + unibrow::uchar c = unibrow::Utf8::CalculateValue(s, length, &cursor); + if (c != unibrow::Utf8::kBadChar) { + WriteUChar(writer_, c); + ASSERT(cursor != 0); + s += cursor - 1; + } else { + writer_->AddCharacter('?'); + } + } + } + } + writer_->AddCharacter('\"'); +} + + +void HeapSnapshotJSONSerializer::SerializeStrings() { + List sorted_strings; + SortHashMap(&strings_, &sorted_strings); + writer_->AddString("\"\""); + for (int i = 0; i < sorted_strings.length(); ++i) { + writer_->AddCharacter(','); + SerializeString( + reinterpret_cast(sorted_strings[i]->key)); + } +} + + +template +inline static int SortUsingEntryValue(const T* x, const T* y) { + return reinterpret_cast((*x)->value) - + reinterpret_cast((*y)->value); +} + +void HeapSnapshotJSONSerializer::SortHashMap( + HashMap* map, List* sorted_entries) { + for (HashMap::Entry* p = map->Start(); p != NULL; p = map->Next(p)) + sorted_entries->Add(p); + sorted_entries->Sort(SortUsingEntryValue); +} + } } // namespace v8::internal #endif // ENABLE_LOGGING_AND_PROFILING diff --git a/src/profile-generator.h b/src/profile-generator.h index c6d6f4c..e5d7f2d 100644 --- a/src/profile-generator.h +++ b/src/profile-generator.h @@ -976,6 +976,53 @@ class HeapSnapshotGenerator { DISALLOW_COPY_AND_ASSIGN(HeapSnapshotGenerator); }; +class OutputStreamWriter; + +class HeapSnapshotJSONSerializer { + public: + explicit HeapSnapshotJSONSerializer(HeapSnapshot* snapshot) + : snapshot_(snapshot), + nodes_(ObjectsMatch), + strings_(ObjectsMatch), + next_node_id_(1), + next_string_id_(1), + writer_(NULL) { + } + void Serialize(v8::OutputStream* stream); + + private: + INLINE(static bool ObjectsMatch(void* key1, void* key2)) { + return key1 == key2; + } + + INLINE(static uint32_t ObjectHash(const void* key)) { + return static_cast(reinterpret_cast(key)); + } + + void EnumerateNodes(); + int GetNodeId(HeapEntry* entry); + int GetStringId(const char* s); + void SerializeEdge(HeapGraphEdge* edge); + void SerializeNode(HeapEntry* entry); + void SerializeNodes(); + void SerializeSnapshot(); + void SerializeString(const unsigned char* s); + void SerializeStrings(); + void SortHashMap(HashMap* map, List* sorted_entries); + + HeapSnapshot* snapshot_; + HashMap nodes_; + HashMap strings_; + int next_node_id_; + int next_string_id_; + OutputStreamWriter* writer_; + + friend class HeapSnapshotJSONSerializerEnumerator; + friend class HeapSnapshotJSONSerializerIterator; + + DISALLOW_COPY_AND_ASSIGN(HeapSnapshotJSONSerializer); +}; + } } // namespace v8::internal #endif // ENABLE_LOGGING_AND_PROFILING diff --git a/src/unicode.h b/src/unicode.h index a3b799e..9d1d683 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -120,6 +120,9 @@ class Utf8 { static inline unsigned Encode(char* out, uchar c); static const byte* ReadBlock(Buffer str, byte* buffer, unsigned capacity, unsigned* chars_read, unsigned* offset); + static uchar CalculateValue(const byte* str, + unsigned length, + unsigned* cursor); static const uchar kBadChar = 0xFFFD; static const unsigned kMaxEncodedSize = 4; static const unsigned kMaxOneByteChar = 0x7f; @@ -133,9 +136,6 @@ class Utf8 { static inline uchar ValueOf(const byte* str, unsigned length, unsigned* cursor); - static uchar CalculateValue(const byte* str, - unsigned length, - unsigned* cursor); }; // --- C h a r a c t e r S t r e a m --- diff --git a/test/cctest/test-heap-profiler.cc b/test/cctest/test-heap-profiler.cc index 6dc49c0..2bc52db 100644 --- a/test/cctest/test-heap-profiler.cc +++ b/test/cctest/test-heap-profiler.cc @@ -989,4 +989,138 @@ TEST(AggregatedHeapSnapshot) { CHECK(IsNodeRetainedAs(a_from_b, 1)); // B has 1 ref to A. } +namespace { + +class TestJSONStream : public v8::OutputStream { + public: + TestJSONStream() : eos_signaled_(0) {} + virtual ~TestJSONStream() {} + virtual void EndOfStream() { ++eos_signaled_; } + virtual void WriteAsciiChunk(char* buffer, int chars_written) { + CHECK_GT(chars_written, 0); + i::Vector chunk = buffer_.AddBlock(chars_written, '\0'); + memcpy(chunk.start(), buffer, chars_written); + } + void WriteTo(i::Vector dest) { buffer_.WriteTo(dest); } + int eos_signaled() { return eos_signaled_; } + int size() { return buffer_.size(); } + private: + i::Collector buffer_; + int eos_signaled_; +}; + +class AsciiResource: public v8::String::ExternalAsciiStringResource { + public: + explicit AsciiResource(i::Vector string): data_(string.start()) { + length_ = string.length(); + } + virtual const char* data() const { return data_; } + virtual size_t length() const { return length_; } + private: + const char* data_; + size_t length_; +}; + +} // namespace + +TEST(HeapSnapshotJSONSerialization) { + v8::HandleScope scope; + LocalContext env; + +#define STRING_LITERAL_FOR_TEST \ + "\"String \\n\\r\\u0008\\u0081\\u0101\\u0801\\u8001\"" + CompileAndRunScript( + "function A(s) { this.s = s; }\n" + "function B(x) { this.x = x; }\n" + "var a = new A(" STRING_LITERAL_FOR_TEST ");\n" + "var b = new B(a);"); + const v8::HeapSnapshot* snapshot = + v8::HeapProfiler::TakeSnapshot(v8::String::New("json")); + TestJSONStream stream; + snapshot->Serialize(&stream, v8::HeapSnapshot::kJSON); + CHECK_GT(stream.size(), 0); + CHECK_EQ(1, stream.eos_signaled()); + i::ScopedVector json(stream.size()); + stream.WriteTo(json); + + // Verify that snapshot string is valid JSON. + AsciiResource json_res(json); + v8::Local json_string = v8::String::NewExternal(&json_res); + env->Global()->Set(v8::String::New("json_snapshot"), json_string); + v8::Local snapshot_parse_result = CompileRun( + "var parsed = JSON.parse(json_snapshot); true;"); + CHECK(!snapshot_parse_result.IsEmpty()); + + // Verify that snapshot object has required fields. + v8::Local parsed_snapshot = + env->Global()->Get(v8::String::New("parsed"))->ToObject(); + CHECK(parsed_snapshot->Has(v8::String::New("snapshot"))); + CHECK(parsed_snapshot->Has(v8::String::New("nodes"))); + CHECK(parsed_snapshot->Has(v8::String::New("strings"))); + + // Verify that nodes meta-info is valid JSON. + v8::Local nodes_meta_parse_result = CompileRun( + "var parsed_meta = JSON.parse(parsed.nodes[0]); true;"); + CHECK(!nodes_meta_parse_result.IsEmpty()); + + // Get node and edge "member" offsets. + v8::Local meta_analysis_result = CompileRun( + "var children_count_offset =" + " parsed_meta.fields.indexOf('children_count');\n" + "var children_offset =" + " parsed_meta.fields.indexOf('children');\n" + "var children_meta =" + " parsed_meta.types[children_offset];\n" + "var child_fields_count = children_meta.fields.length;\n" + "var child_type_offset =" + " children_meta.fields.indexOf('type');\n" + "var child_name_offset =" + " children_meta.fields.indexOf('name_or_index');\n" + "var child_to_node_offset =" + " children_meta.fields.indexOf('to_node');\n" + "var property_type =" + " children_meta.types[child_type_offset].indexOf('property');"); + CHECK(!meta_analysis_result.IsEmpty()); + + // A helper function for processing encoded nodes. + CompileRun( + "function GetChildPosByProperty(pos, prop_name) {\n" + " var nodes = parsed.nodes;\n" + " var strings = parsed.strings;\n" + " for (var i = 0,\n" + " count = nodes[pos + children_count_offset] * child_fields_count;\n" + " i < count; i += child_fields_count) {\n" + " var child_pos = pos + children_offset + i;\n" + " if (nodes[child_pos + child_type_offset] === property_type\n" + " && strings[nodes[child_pos + child_name_offset]] === prop_name)\n" + " return nodes[child_pos + child_to_node_offset];\n" + " }\n" + " return null;\n" + "}\n"); + // Get the string index using the path: -> .b.x.s + v8::Local string_obj_pos_val = CompileRun( + "GetChildPosByProperty(\n" + " GetChildPosByProperty(\n" + " GetChildPosByProperty(" + " parsed.nodes[1 + children_offset + child_to_node_offset],\"b\"),\n" + " \"x\")," + " \"s\")"); + CHECK(!string_obj_pos_val.IsEmpty()); + int string_obj_pos = + static_cast(string_obj_pos_val->ToNumber()->Value()); + v8::Local nodes_array = + parsed_snapshot->Get(v8::String::New("nodes"))->ToObject(); + int string_index = static_cast( + nodes_array->Get(string_obj_pos + 1)->ToNumber()->Value()); + CHECK_GT(string_index, 0); + v8::Local strings_array = + parsed_snapshot->Get(v8::String::New("strings"))->ToObject(); + v8::Local string = strings_array->Get(string_index)->ToString(); + v8::Local ref_string = + CompileRun(STRING_LITERAL_FOR_TEST)->ToString(); +#undef STRING_LITERAL_FOR_TEST + CHECK_EQ(*v8::String::Utf8Value(ref_string), + *v8::String::Utf8Value(string)); +} + #endif // ENABLE_LOGGING_AND_PROFILING -- 2.7.4