From 5e1d926053aef5f97f90b0cb7471aaef9ecce297 Mon Sep 17 00:00:00 2001 From: "dcarney@chromium.org" Date: Wed, 13 Mar 2013 19:43:45 +0000 Subject: [PATCH] Some Utf8Length microoptimizations R=yangguo@chromium.org BUG= Review URL: https://codereview.chromium.org/12783002 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13938 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/api.cc | 71 ++++++++++++++++++++++++++----------------------------- src/unicode-inl.h | 12 ++++++++++ src/unicode.h | 1 + 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/src/api.cc b/src/api.cc index 9441810..faba6de 100644 --- a/src/api.cc +++ b/src/api.cc @@ -4011,31 +4011,30 @@ class Utf8LengthHelper : public i::AllStatic { class Visitor { public: - explicit Visitor() + inline explicit Visitor() : utf8_length_(0), state_(kInitialState) {} - template - inline void Visit(const Char* chars, int length) { + void VisitOneByteString(const uint8_t* chars, int length) { int utf8_length = 0; - int last_character = unibrow::Utf16::kNoPreviousCharacter; + // Add in length 1 for each non-ASCII character. for (int i = 0; i < length; i++) { - uint16_t c = chars[i]; - utf8_length += unibrow::Utf8::Length(c, last_character); - if (sizeof(Char) > 1) { - last_character = c; - } + utf8_length += *chars++ >> 7; } - utf8_length_ = utf8_length; - } - - void VisitOneByteString(const uint8_t* chars, int length) { - Visit(chars, length); + // Add in length 1 for each character. + utf8_length_ = utf8_length + length; state_ = kInitialState; } void VisitTwoByteString(const uint16_t* chars, int length) { - Visit(chars, length); + int utf8_length = 0; + int last_character = unibrow::Utf16::kNoPreviousCharacter; + for (int i = 0; i < length; i++) { + uint16_t c = chars[i]; + utf8_length += unibrow::Utf8::Length(c, last_character); + last_character = c; + } + utf8_length_ = utf8_length; uint8_t state = 0; if (unibrow::Utf16::IsTrailSurrogate(chars[0])) { state |= kStartsWithTrailingSurrogate; @@ -4132,32 +4131,30 @@ class Utf8LengthHelper : public i::AllStatic { if (right_as_cons == NULL) { total_length += leaf_length; MergeLeafRight(&total_length, &state, right_leaf_state); - // Terminal node. - if (left_as_cons == NULL) { - MergeTerminal(&total_length, state, state_out); - return total_length; - } - } else if (left_as_cons != NULL) { - // Both strings are ConsStrings. - // Recurse on smallest. - if (left->length() < right->length()) { - total_length += Calculate(left_as_cons, &left_leaf_state); - MergeLeafLeft(&total_length, &state, left_leaf_state); - current = right_as_cons; - continue; - } else { - total_length += Calculate(right_as_cons, &right_leaf_state); - MergeLeafRight(&total_length, &state, right_leaf_state); + if (left_as_cons != NULL) { + // 1 Leaf node. Descend in place. current = left_as_cons; continue; + } else { + // Terminal node. + MergeTerminal(&total_length, state, state_out); + return total_length; } + } else if (left_as_cons == NULL) { + // 1 Leaf node. Descend in place. + current = right_as_cons; + continue; } - // 1 leaf node. Do in place descent. - if (left_as_cons != NULL) { - current = left_as_cons; - } else { - ASSERT(right_as_cons != NULL); + // Both strings are ConsStrings. + // Recurse on smallest. + if (left->length() < right->length()) { + total_length += Calculate(left_as_cons, &left_leaf_state); + MergeLeafLeft(&total_length, &state, left_leaf_state); current = right_as_cons; + } else { + total_length += Calculate(right_as_cons, &right_leaf_state); + MergeLeafRight(&total_length, &state, right_leaf_state); + current = left_as_cons; } } UNREACHABLE(); @@ -4267,7 +4264,7 @@ class Utf8WriterVisitor { if (sizeof(Char) == 1) { for (; i < fast_length; i++) { buffer += - Utf8::Encode(buffer, *chars++, Utf16::kNoPreviousCharacter); + Utf8::EncodeOneByte(buffer, static_cast(*chars++)); ASSERT(capacity_ == -1 || (buffer - start_) <= capacity_); } } else { diff --git a/src/unicode-inl.h b/src/unicode-inl.h index c80c67e..02be457 100644 --- a/src/unicode-inl.h +++ b/src/unicode-inl.h @@ -95,6 +95,18 @@ uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) { } +unsigned Utf8::EncodeOneByte(char* str, uint8_t c) { + static const int kMask = ~(1 << 6); + if (c <= kMaxOneByteChar) { + str[0] = c; + return 1; + } + str[0] = 0xC0 | (c >> 6); + str[1] = 0x80 | (c & kMask); + return 2; +} + + unsigned Utf8::Encode(char* str, uchar c, int previous) { static const int kMask = ~(1 << 6); if (c <= kMaxOneByteChar) { diff --git a/src/unicode.h b/src/unicode.h index f8a1f60..3279ad8 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -149,6 +149,7 @@ class Latin1 { class Utf8 { public: static inline uchar Length(uchar chr, int previous); + static inline unsigned EncodeOneByte(char* out, uint8_t c); static inline unsigned Encode( char* out, uchar c, int previous); static uchar CalculateValue(const byte* str, -- 2.7.4