}
+StringHasher::StringHasher(int length)
+ : length_(length),
+ raw_running_hash_(0),
+ array_index_(0),
+ is_array_index_(0 < length_ && length_ <= String::kMaxArrayIndexSize),
+ is_first_char_(true),
+ is_valid_(true) { }
+
+
+bool StringHasher::has_trivial_hash() {
+ return length_ > String::kMaxMediumStringSize;
+}
+
+
+void StringHasher::AddCharacter(uc32 c) {
+ // Note: the Jenkins one-at-a-time hash function
+ raw_running_hash_ += c;
+ raw_running_hash_ += (raw_running_hash_ << 10);
+ raw_running_hash_ ^= (raw_running_hash_ >> 6);
+ // Incremental array index computation
+ if (is_array_index_) {
+ if (c < '0' || c > '9') {
+ is_array_index_ = false;
+ } else {
+ int d = c - '0';
+ if (is_first_char_) {
+ is_first_char_ = false;
+ if (c == '0' && length_ > 1) {
+ is_array_index_ = false;
+ return;
+ }
+ }
+ if (array_index_ > 429496729U - ((d + 2) >> 3)) {
+ is_array_index_ = false;
+ } else {
+ array_index_ = array_index_ * 10 + d;
+ }
+ }
+ }
+}
+
+
+void StringHasher::AddCharacterNoIndex(uc32 c) {
+ ASSERT(!is_array_index());
+ raw_running_hash_ += c;
+ raw_running_hash_ += (raw_running_hash_ << 10);
+ raw_running_hash_ ^= (raw_running_hash_ >> 6);
+}
+
+
+uint32_t StringHasher::GetHash() {
+ uint32_t result = raw_running_hash_;
+ result += (result << 3);
+ result ^= (result >> 11);
+ result += (result << 15);
+ return result;
+}
+
+
bool String::AsArrayIndex(uint32_t* index) {
uint32_t field = length_field();
if ((field & kHashComputedMask) && !(field & kIsArrayIndexMask)) return false;
// cons string is in old space. It can never get GCed until there is
// an old space GC.
PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED : TENURED;
+ int len = length();
Object* object = IsAscii() ?
- Heap::AllocateRawAsciiString(length(), tenure) :
- Heap::AllocateRawTwoByteString(length(), tenure);
+ Heap::AllocateRawAsciiString(len, tenure) :
+ Heap::AllocateRawTwoByteString(len, tenure);
if (object->IsFailure()) return object;
String* result = String::cast(object);
- Flatten(this, result, 0, length(), 0);
+ StringHasher hasher(len);
+ Flatten(this, result, 0, len, 0, &hasher);
+ if (hasher.is_valid()) {
+#ifdef DEBUG
+ result->ComputeAndSetHash();
+ ASSERT(result->length_field() == hasher.GetHashField());
+#else
+ result->set_length_field(hasher.GetHashField());
+#endif
+ Heap::LookupSymbolIfExists(result, &result);
+ }
cs->set_first(result);
cs->set_second(Heap::empty_string());
return this;
}
-void String::Flatten(String* src, String* sink, int f, int t, int so) {
+void String::Flatten(String* src,
+ String* sink,
+ int f,
+ int t,
+ int so,
+ StringHasher* hasher) {
String* source = src;
int from = f;
int to = t;
buffer->Reset(from, source);
int j = sink_offset;
for (int i = from; i < to; i++) {
- sink->Set(j++, buffer->GetNext());
+ uc32 c = buffer->GetNext();
+ if (hasher->is_valid())
+ hasher->AddCharacter(c);
+ sink->Set(j++, c);
}
return;
}
ConsString* cons_string = ConsString::cast(source);
String* first = String::cast(cons_string->first());
int boundary = first->length();
- if (to - boundary > boundary - from) {
+ if (to - boundary >= boundary - from) {
// Right hand side is longer. Recurse over left.
if (from < boundary) {
- Flatten(first, sink, from, boundary, sink_offset);
+ Flatten(first, sink, from, boundary, sink_offset, hasher);
sink_offset += boundary - from;
from = 0;
} else {
to -= boundary;
source = String::cast(cons_string->second());
} else {
- // Left hand side is longer. Recurse over right.
+ // Left hand side is longer. Recurse over right. The hasher
+ // needs us to visit the string from left to right so doing
+ // this invalidates that hash.
+ hasher->invalidate();
if (to > boundary) {
String* second = String::cast(cons_string->second());
Flatten(second,
sink,
0,
to - boundary,
- sink_offset + boundary - from);
+ sink_offset + boundary - from,
+ hasher);
to = boundary;
}
source = first;
}
+uint32_t StringHasher::GetHashField() {
+ ASSERT(is_valid());
+ if (length_ <= String::kMaxShortStringSize) {
+ uint32_t payload;
+ if (is_array_index()) {
+ payload = v8::internal::HashField(array_index(), true);
+ } else {
+ payload = v8::internal::HashField(GetHash(), false);
+ }
+ return (payload & 0x00FFFFFF) | (length_ << String::kShortLengthShift);
+ } else if (length_ <= String::kMaxMediumStringSize) {
+ uint32_t payload = v8::internal::HashField(GetHash(), false);
+ return (payload & 0x0000FFFF) | (length_ << String::kMediumLengthShift);
+ } else {
+ return v8::internal::HashField(length_, false);
+ }
+}
+
+
uint32_t String::ComputeLengthAndHashField(unibrow::CharacterStream* buffer,
int length) {
- // Large string (please note large strings cannot be an array index).
- if (length > kMaxMediumStringSize) return HashField(length, false);
+ StringHasher hasher(length);
- // Note: the Jenkins one-at-a-time hash function
- uint32_t hash = 0;
- while (buffer->has_more()) {
- uc32 r = buffer->GetNext();
- hash += r;
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
-
- // Short string.
- if (length <= kMaxShortStringSize) {
- // Make hash value consistent with value returned from String::Hash.
- buffer->Rewind();
- uint32_t index;
- hash = HashField(hash, ComputeArrayIndex(buffer, &index, length));
- hash = (hash & 0x00FFFFFF) | (length << kShortLengthShift);
- return hash;
- }
+ // Very long strings have a trivial hash that doesn't inspect the
+ // string contents.
+ if (hasher.has_trivial_hash())
+ return hasher.GetHashField();
- // Medium string (please note medium strings cannot be an array index).
- ASSERT(length <= kMaxMediumStringSize);
- // Make hash value consistent with value returned from String::Hash.
- hash = HashField(hash, false);
- hash = (hash & 0x0000FFFF) | (length << kMediumLengthShift);
- return hash;
+ // Do the iterative array index computation as long as there is a
+ // chance this is an array index.
+ while (buffer->has_more() && hasher.is_array_index())
+ hasher.AddCharacter(buffer->GetNext());
+
+ // Process the remaining characters without updating the array
+ // index.
+ while (buffer->has_more())
+ hasher.AddCharacterNoIndex(buffer->GetNext());
+
+ return hasher.GetHashField();
}
}
+bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {
+ SymbolKey key(string);
+ int entry = FindEntry(&key);
+ if (entry == -1) {
+ return false;
+ } else {
+ String* result = String::cast(KeyAt(entry));
+ ASSERT(result->is_symbol());
+ *symbol = result;
+ return true;
+ }
+}
+
+
Object* SymbolTable::LookupSymbol(Vector<const char> str, Object** s) {
Utf8SymbolKey key(str);
return LookupKey(&key, s);
Object* LookupSymbol(Vector<const char> str, Object** s);
Object* LookupString(String* key, Object** s);
+ // Looks up a symbol that is equal to the given string and returns
+ // true if it is found, assigning the symbol to the given output
+ // parameter.
+ bool LookupSymbolIfExists(String* str, String** symbol);
+
// Casting.
static inline SymbolTable* cast(Object* obj);
enum RobustnessFlag {ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL};
+class StringHasher {
+ public:
+ inline StringHasher(int length);
+
+ // Returns true if the hash of this string can be computed without
+ // looking at the contents.
+ inline bool has_trivial_hash();
+
+ // Add a character to the hash and update the array index calculation.
+ inline void AddCharacter(uc32 c);
+
+ // Adds a character to the hash but does not update the array index
+ // calculation. This can only be called when it has been verified
+ // that the input is not an array index.
+ inline void AddCharacterNoIndex(uc32 c);
+
+ // Returns the value to store in the hash field of a string with
+ // the given length and contents.
+ uint32_t GetHashField();
+
+ // Returns true if the characters seen so far make up a legal array
+ // index.
+ bool is_array_index() { return is_array_index_; }
+
+ bool is_valid() { return is_valid_; }
+
+ void invalidate() { is_valid_ = false; }
+
+ private:
+
+ uint32_t array_index() {
+ ASSERT(is_array_index());
+ return array_index_;
+ }
+
+ inline uint32_t GetHash();
+
+ int length_;
+ uint32_t raw_running_hash_;
+ uint32_t array_index_;
+ bool is_array_index_;
+ bool is_first_char_;
+ bool is_valid_;
+};
+
+
// The String abstract class captures JavaScript string values:
//
// Ecma-262:
static const int kMaxShortStringSize = 255;
static const int kMaxMediumStringSize = 65535;
+ static const int kMaxArrayIndexSize = 10;
+
// Max ascii char code.
static const int kMaxAsciiCharCode = 127;
String* sink,
int from,
int to,
- int sink_offset);
+ int sink_offset,
+ StringHasher* hasher);
protected:
class ReadBlockBuffer {
if (object->IsFailure()) return object;
String* answer = String::cast(object);
+ StringHasher hasher(length);
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
- String::Flatten(special, answer, pos, pos + len, position);
+ String::Flatten(special, answer, pos, pos + len, position, &hasher);
position += len;
} else {
String* string = String::cast(element);
int element_length = string->length();
- String::Flatten(string, answer, 0, element_length, position);
+ String::Flatten(string, answer, 0, element_length, position, &hasher);
position += element_length;
}
}
+ if (hasher.is_valid())
+ answer->set_length_field(hasher.GetHashField());
return answer;
}