From: christian.plesner.hansen@gmail.com Date: Tue, 7 Oct 2008 10:10:03 +0000 (+0000) Subject: Calculate string hash during flattening and convert flat strings to X-Git-Tag: upstream/4.7.83~25238 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=17096cab78334502d67ce0a1323e56dcf17dbd67;p=platform%2Fupstream%2Fv8.git Calculate string hash during flattening and convert flat strings to symbols. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@456 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/heap.cc b/src/heap.cc index 475116577..34201996b 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -2251,6 +2251,16 @@ Object* Heap::LookupSymbol(String* string) { } +bool Heap::LookupSymbolIfExists(String* string, String** symbol) { + if (string->IsSymbol()) { + *symbol = string; + return true; + } + SymbolTable* table = SymbolTable::cast(symbol_table_); + return table->LookupSymbolIfExists(string, symbol); +} + + #ifdef DEBUG void Heap::ZapFromSpace() { ASSERT(HAS_HEAP_OBJECT_TAG(kFromSpaceZapValue)); diff --git a/src/heap.h b/src/heap.h index ee95e3a81..fb7c3be8e 100644 --- a/src/heap.h +++ b/src/heap.h @@ -528,6 +528,7 @@ class Heap : public AllStatic { return LookupSymbol(CStrVector(str)); } static Object* LookupSymbol(String* str); + static bool LookupSymbolIfExists(String* str, String** symbol); // Compute the matching symbol map for a string if possible. // NULL is returned if string is in new space or not flattened. diff --git a/src/objects-inl.h b/src/objects-inl.h index a612ba984..dda4dbbab 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -2088,6 +2088,65 @@ uint32_t String::Hash() { } +StringHasher::StringHasher(int length) + : length_(length), + raw_running_hash_(0), + array_index_(0), + is_array_index_(0 < length_ && length_ <= String::kMaxArrayIndexSize), + is_first_char_(true), + is_valid_(true) { } + + +bool StringHasher::has_trivial_hash() { + return length_ > String::kMaxMediumStringSize; +} + + +void StringHasher::AddCharacter(uc32 c) { + // Note: the Jenkins one-at-a-time hash function + raw_running_hash_ += c; + raw_running_hash_ += (raw_running_hash_ << 10); + raw_running_hash_ ^= (raw_running_hash_ >> 6); + // Incremental array index computation + if (is_array_index_) { + if (c < '0' || c > '9') { + is_array_index_ = false; + } else { + int d = c - '0'; + if (is_first_char_) { + is_first_char_ = false; + if (c == '0' && length_ > 1) { + is_array_index_ = false; + return; + } + } + if (array_index_ > 429496729U - ((d + 2) >> 3)) { + is_array_index_ = false; + } else { + array_index_ = array_index_ * 10 + d; + } + } + } +} + + +void StringHasher::AddCharacterNoIndex(uc32 c) { + ASSERT(!is_array_index()); + raw_running_hash_ += c; + raw_running_hash_ += (raw_running_hash_ << 10); + raw_running_hash_ ^= (raw_running_hash_ >> 6); +} + + +uint32_t StringHasher::GetHash() { + uint32_t result = raw_running_hash_; + result += (result << 3); + result ^= (result >> 11); + result += (result << 15); + return result; +} + + bool String::AsArrayIndex(uint32_t* index) { uint32_t field = length_field(); if ((field & kHashComputedMask) && !(field & kIsArrayIndexMask)) return false; diff --git a/src/objects.cc b/src/objects.cc index b00eab50d..1b3e6d001 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -521,12 +521,23 @@ Object* String::Flatten() { // cons string is in old space. It can never get GCed until there is // an old space GC. PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED; + int len = length(); Object* object = IsAscii() ? - Heap::AllocateRawAsciiString(length(), tenure) : - Heap::AllocateRawTwoByteString(length(), tenure); + Heap::AllocateRawAsciiString(len, tenure) : + Heap::AllocateRawTwoByteString(len, tenure); if (object->IsFailure()) return object; String* result = String::cast(object); - Flatten(this, result, 0, length(), 0); + StringHasher hasher(len); + Flatten(this, result, 0, len, 0, &hasher); + if (hasher.is_valid()) { +#ifdef DEBUG + result->ComputeAndSetHash(); + ASSERT(result->length_field() == hasher.GetHashField()); +#else + result->set_length_field(hasher.GetHashField()); +#endif + Heap::LookupSymbolIfExists(result, &result); + } cs->set_first(result); cs->set_second(Heap::empty_string()); return this; @@ -3606,7 +3617,12 @@ Object* SlicedString::SlicedStringFlatten() { } -void String::Flatten(String* src, String* sink, int f, int t, int so) { +void String::Flatten(String* src, + String* sink, + int f, + int t, + int so, + StringHasher* hasher) { String* source = src; int from = f; int to = t; @@ -3621,7 +3637,10 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) { buffer->Reset(from, source); int j = sink_offset; for (int i = from; i < to; i++) { - sink->Set(j++, buffer->GetNext()); + uc32 c = buffer->GetNext(); + if (hasher->is_valid()) + hasher->AddCharacter(c); + sink->Set(j++, c); } return; } @@ -3637,10 +3656,10 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) { ConsString* cons_string = ConsString::cast(source); String* first = String::cast(cons_string->first()); int boundary = first->length(); - if (to - boundary > boundary - from) { + if (to - boundary >= boundary - from) { // Right hand side is longer. Recurse over left. if (from < boundary) { - Flatten(first, sink, from, boundary, sink_offset); + Flatten(first, sink, from, boundary, sink_offset, hasher); sink_offset += boundary - from; from = 0; } else { @@ -3649,14 +3668,18 @@ void String::Flatten(String* src, String* sink, int f, int t, int so) { to -= boundary; source = String::cast(cons_string->second()); } else { - // Left hand side is longer. Recurse over right. + // Left hand side is longer. Recurse over right. The hasher + // needs us to visit the string from left to right so doing + // this invalidates that hash. + hasher->invalidate(); if (to > boundary) { String* second = String::cast(cons_string->second()); Flatten(second, sink, 0, to - boundary, - sink_offset + boundary - from); + sink_offset + boundary - from, + hasher); to = boundary; } source = first; @@ -3812,39 +3835,45 @@ static inline uint32_t HashField(uint32_t hash, bool is_array_index) { } +uint32_t StringHasher::GetHashField() { + ASSERT(is_valid()); + if (length_ <= String::kMaxShortStringSize) { + uint32_t payload; + if (is_array_index()) { + payload = v8::internal::HashField(array_index(), true); + } else { + payload = v8::internal::HashField(GetHash(), false); + } + return (payload & 0x00FFFFFF) | (length_ << String::kShortLengthShift); + } else if (length_ <= String::kMaxMediumStringSize) { + uint32_t payload = v8::internal::HashField(GetHash(), false); + return (payload & 0x0000FFFF) | (length_ << String::kMediumLengthShift); + } else { + return v8::internal::HashField(length_, false); + } +} + + uint32_t String::ComputeLengthAndHashField(unibrow::CharacterStream* buffer, int length) { - // Large string (please note large strings cannot be an array index). - if (length > kMaxMediumStringSize) return HashField(length, false); + StringHasher hasher(length); - // Note: the Jenkins one-at-a-time hash function - uint32_t hash = 0; - while (buffer->has_more()) { - uc32 r = buffer->GetNext(); - hash += r; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - // Short string. - if (length <= kMaxShortStringSize) { - // Make hash value consistent with value returned from String::Hash. - buffer->Rewind(); - uint32_t index; - hash = HashField(hash, ComputeArrayIndex(buffer, &index, length)); - hash = (hash & 0x00FFFFFF) | (length << kShortLengthShift); - return hash; - } + // Very long strings have a trivial hash that doesn't inspect the + // string contents. + if (hasher.has_trivial_hash()) + return hasher.GetHashField(); - // Medium string (please note medium strings cannot be an array index). - ASSERT(length <= kMaxMediumStringSize); - // Make hash value consistent with value returned from String::Hash. - hash = HashField(hash, false); - hash = (hash & 0x0000FFFF) | (length << kMediumLengthShift); - return hash; + // Do the iterative array index computation as long as there is a + // chance this is an array index. + while (buffer->has_more() && hasher.is_array_index()) + hasher.AddCharacter(buffer->GetNext()); + + // Process the remaining characters without updating the array + // index. + while (buffer->has_more()) + hasher.AddCharacterNoIndex(buffer->GetNext()); + + return hasher.GetHashField(); } @@ -5630,6 +5659,20 @@ Object* SymbolTable::LookupString(String* string, Object** s) { } +bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) { + SymbolKey key(string); + int entry = FindEntry(&key); + if (entry == -1) { + return false; + } else { + String* result = String::cast(KeyAt(entry)); + ASSERT(result->is_symbol()); + *symbol = result; + return true; + } +} + + Object* SymbolTable::LookupSymbol(Vector str, Object** s) { Utf8SymbolKey key(str); return LookupKey(&key, s); diff --git a/src/objects.h b/src/objects.h index d718470ba..b1aa55007 100644 --- a/src/objects.h +++ b/src/objects.h @@ -1815,6 +1815,11 @@ class SymbolTable: public HashTable<0, 1> { Object* LookupSymbol(Vector str, Object** s); Object* LookupString(String* key, Object** s); + // Looks up a symbol that is equal to the given string and returns + // true if it is found, assigning the symbol to the given output + // parameter. + bool LookupSymbolIfExists(String* str, String** symbol); + // Casting. static inline SymbolTable* cast(Object* obj); @@ -2839,6 +2844,52 @@ enum AllowNullsFlag {ALLOW_NULLS, DISALLOW_NULLS}; enum RobustnessFlag {ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL}; +class StringHasher { + public: + inline StringHasher(int length); + + // Returns true if the hash of this string can be computed without + // looking at the contents. + inline bool has_trivial_hash(); + + // Add a character to the hash and update the array index calculation. + inline void AddCharacter(uc32 c); + + // Adds a character to the hash but does not update the array index + // calculation. This can only be called when it has been verified + // that the input is not an array index. + inline void AddCharacterNoIndex(uc32 c); + + // Returns the value to store in the hash field of a string with + // the given length and contents. + uint32_t GetHashField(); + + // Returns true if the characters seen so far make up a legal array + // index. + bool is_array_index() { return is_array_index_; } + + bool is_valid() { return is_valid_; } + + void invalidate() { is_valid_ = false; } + + private: + + uint32_t array_index() { + ASSERT(is_array_index()); + return array_index_; + } + + inline uint32_t GetHash(); + + int length_; + uint32_t raw_running_hash_; + uint32_t array_index_; + bool is_array_index_; + bool is_first_char_; + bool is_valid_; +}; + + // The String abstract class captures JavaScript string values: // // Ecma-262: @@ -2980,6 +3031,8 @@ class String: public HeapObject { static const int kMaxShortStringSize = 255; static const int kMaxMediumStringSize = 65535; + static const int kMaxArrayIndexSize = 10; + // Max ascii char code. static const int kMaxAsciiCharCode = 127; @@ -3024,7 +3077,8 @@ class String: public HeapObject { String* sink, int from, int to, - int sink_offset); + int sink_offset, + StringHasher* hasher); protected: class ReadBlockBuffer { diff --git a/src/runtime.cc b/src/runtime.cc index be1dbcddd..1b9233f09 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2380,21 +2380,24 @@ static Object* Runtime_StringBuilderConcat(Arguments args) { if (object->IsFailure()) return object; String* answer = String::cast(object); + StringHasher hasher(length); for (int i = 0; i < array_length; i++) { Object* element = fixed_array->get(i); if (element->IsSmi()) { int len = Smi::cast(element)->value(); int pos = len >> 11; len &= 0x7ff; - String::Flatten(special, answer, pos, pos + len, position); + String::Flatten(special, answer, pos, pos + len, position, &hasher); position += len; } else { String* string = String::cast(element); int element_length = string->length(); - String::Flatten(string, answer, 0, element_length, position); + String::Flatten(string, answer, 0, element_length, position, &hasher); position += element_length; } } + if (hasher.is_valid()) + answer->set_length_field(hasher.GetHashField()); return answer; }