From: serya@chromium.org Date: Wed, 31 Mar 2010 10:11:33 +0000 (+0000) Subject: StringToInt rewritten. This version doesn't allocate memory for long decimals and... X-Git-Tag: upstream/4.7.83~22086 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dcd96d1fa0500af5caad1aefbe6dc7bd8cc4caf8;p=platform%2Fupstream%2Fv8.git StringToInt rewritten. This version doesn't allocate memory for long decimals and uses percise rounding if radix 10 or a power of 2 (in other cases rounding error still may occur). Handling special values moved from Runtime_StringParseInt into StringToInt in order to make it consistent with StringToDouble. Review URL: http://codereview.chromium.org/1529004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4329 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/conversions.cc b/src/conversions.cc index 5492225..3eb73e4 100644 --- a/src/conversions.cc +++ b/src/conversions.cc @@ -48,51 +48,6 @@ int HexValue(uc32 c) { return -1; } - -// Provide a common interface to getting a character at a certain -// index from a char* or a String object. -static inline int GetChar(const char* str, int index) { - ASSERT(index >= 0 && index < StrLength(str)); - return str[index]; -} - - -static inline int GetChar(String* str, int index) { - return str->Get(index); -} - - -static inline int GetLength(const char* str) { - return StrLength(str); -} - - -static inline int GetLength(String* str) { - return str->length(); -} - - -static inline const char* GetCString(const char* str, int index) { - return str + index; -} - - -static inline const char* GetCString(String* str, int index) { - int length = str->length(); - char* result = NewArray(length + 1); - for (int i = index; i < length; i++) { - uc16 c = str->Get(i); - if (c <= 127) { - result[i - index] = static_cast(c); - } else { - result[i - index] = 127; // Force number parsing to fail. - } - } - result[length - index] = '\0'; - return result; -} - - namespace { // C++-style iterator adaptor for StringInputBuffer @@ -134,15 +89,6 @@ void StringInputBufferIterator::operator++() { } -static inline void ReleaseCString(const char* original, const char* str) { -} - - -static inline void ReleaseCString(String* original, const char* str) { - DeleteArray(const_cast(str)); -} - - template static bool SubStringEquals(Iterator* current, EndMark end, @@ -168,97 +114,6 @@ extern "C" double gay_strtod(const char* s00, const char** se); // we don't need to preserve all the digits. const int kMaxSignificantDigits = 772; -// Parse an int from a string starting a given index and in a given -// radix. The string can be either a char* or a String*. -template -static int InternalStringToInt(S* s, int i, int radix, double* value) { - int len = GetLength(s); - - // Setup limits for computing the value. - ASSERT(2 <= radix && radix <= 36); - int lim_0 = '0' + (radix < 10 ? radix : 10); - int lim_a = 'a' + (radix - 10); - int lim_A = 'A' + (radix - 10); - - // NOTE: The code for computing the value may seem a bit complex at - // first glance. It is structured to use 32-bit multiply-and-add - // loops as long as possible to avoid loosing precision. - - double v = 0.0; - int j; - for (j = i; j < len;) { - // Parse the longest part of the string starting at index j - // possible while keeping the multiplier, and thus the part - // itself, within 32 bits. - uint32_t part = 0, multiplier = 1; - int k; - for (k = j; k < len; k++) { - int c = GetChar(s, k); - if (c >= '0' && c < lim_0) { - c = c - '0'; - } else if (c >= 'a' && c < lim_a) { - c = c - 'a' + 10; - } else if (c >= 'A' && c < lim_A) { - c = c - 'A' + 10; - } else { - break; - } - - // Update the value of the part as long as the multiplier fits - // in 32 bits. When we can't guarantee that the next iteration - // will not overflow the multiplier, we stop parsing the part - // by leaving the loop. - static const uint32_t kMaximumMultiplier = 0xffffffffU / 36; - uint32_t m = multiplier * radix; - if (m > kMaximumMultiplier) break; - part = part * radix + c; - multiplier = m; - ASSERT(multiplier > part); - } - - // Compute the number of part digits. If no digits were parsed; - // we're done parsing the entire string. - int digits = k - j; - if (digits == 0) break; - - // Update the value and skip the part in the string. - ASSERT(multiplier == - pow(static_cast(radix), static_cast(digits))); - v = v * multiplier + part; - j = k; - } - - // If the resulting value is larger than 2^53 the value does not fit - // in the mantissa of the double and there is a loss of precision. - // When the value is larger than 2^53 the rounding depends on the - // code generation. If the code generator spills the double value - // it uses 64 bits and if it does not it uses 80 bits. - // - // If there is a potential for overflow we resort to strtod for - // radix 10 numbers to get higher precision. For numbers in another - // radix we live with the loss of precision. - static const double kPreciseConversionLimit = 9007199254740992.0; - if (radix == 10 && v > kPreciseConversionLimit) { - const char* cstr = GetCString(s, i); - const char* end; - v = gay_strtod(cstr, &end); - ReleaseCString(s, cstr); - } - - *value = v; - return j; -} - - -int StringToInt(String* str, int index, int radix, double* value) { - return InternalStringToInt(str, index, radix, value); -} - - -int StringToInt(const char* str, int index, int radix, double* value) { - return InternalStringToInt(const_cast(str), index, radix, value); -} - static const double JUNK_STRING_VALUE = OS::nan_value(); @@ -281,18 +136,23 @@ static bool isDigit(int x, int radix) { } +static double SignedZero(bool sign) { + return sign ? -0.0 : 0.0; +} + + // Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. template - static double InternalStringToIntDouble(Iterator current, - EndMark end, - bool sign, - bool allow_trailing_junk) { +static double InternalStringToIntDouble(Iterator current, + EndMark end, + bool sign, + bool allow_trailing_junk) { ASSERT(current != end); // Skip leading 0s. while (*current == '0') { ++current; - if (current == end) return sign ? -0.0 : 0.0; + if (current == end) return SignedZero(sign); } int64_t number = 0; @@ -382,6 +242,183 @@ template } +template +static double InternalStringToInt(Iterator current, EndMark end, int radix) { + const bool allow_trailing_junk = true; + const double empty_string_val = JUNK_STRING_VALUE; + + if (!AdvanceToNonspace(¤t, end)) return empty_string_val; + + bool sign = false; + bool leading_zero = false; + + if (*current == '+') { + // Ignore leading sign; skip following spaces. + ++current; + if (!AdvanceToNonspace(¤t, end)) return JUNK_STRING_VALUE; + } else if (*current == '-') { + ++current; + if (!AdvanceToNonspace(¤t, end)) return JUNK_STRING_VALUE; + sign = true; + } + + if (radix == 0) { + // Radix detection. + if (*current == '0') { + ++current; + if (current == end) return SignedZero(sign); + if (*current == 'x' || *current == 'X') { + radix = 16; + ++current; + if (current == end) return JUNK_STRING_VALUE; + } else { + radix = 8; + leading_zero = true; + } + } else { + radix = 10; + } + } else if (radix == 16) { + if (*current == '0') { + // Allow "0x" prefix. + ++current; + if (current == end) return SignedZero(sign); + if (*current == 'x' || *current == 'X') { + ++current; + if (current == end) return JUNK_STRING_VALUE; + } else { + leading_zero = true; + } + } + } + + if (radix < 2 || radix > 36) return JUNK_STRING_VALUE; + + // Skip leading zeros. + while (*current == '0') { + leading_zero = true; + ++current; + if (current == end) return SignedZero(sign); + } + + if (!leading_zero && !isDigit(*current, radix)) { + return JUNK_STRING_VALUE; + } + + if (IsPowerOf2(radix)) { + switch (radix) { + case 2: + return InternalStringToIntDouble<1>( + current, end, sign, allow_trailing_junk); + case 4: + return InternalStringToIntDouble<2>( + current, end, sign, allow_trailing_junk); + case 8: + return InternalStringToIntDouble<3>( + current, end, sign, allow_trailing_junk); + + case 16: + return InternalStringToIntDouble<4>( + current, end, sign, allow_trailing_junk); + + case 32: + return InternalStringToIntDouble<5>( + current, end, sign, allow_trailing_junk); + default: + UNREACHABLE(); + } + } + + if (radix == 10) { + // Parsing with strtod. + const int kMaxSignificantDigits = 308; // Doubles are less than 1.8e308. + // The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero + // end. + const int kBufferSize = kMaxSignificantDigits + 2; + char buffer[kBufferSize]; + int buffer_pos = 0; + while (*current >= '0' && *current <= '9') { + if (buffer_pos <= kMaxSignificantDigits) { + // If the number has more than kMaxSignificantDigits it will be parsed + // as infinity. + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast(*current); + } + ++current; + if (current == end) break; + } + + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return JUNK_STRING_VALUE; + } + + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = '\0'; + return sign ? -gay_strtod(buffer, NULL) : gay_strtod(buffer, NULL); + } + + // TODO(serya): The following legacy code causes accumulating rounding + // error for number greater than ~2^56. It should be rewritten using long + // arithmetic. + + int lim_0 = '0' + (radix < 10 ? radix : 10); + int lim_a = 'a' + (radix - 10); + int lim_A = 'A' + (radix - 10); + + // NOTE: The code for computing the value may seem a bit complex at + // first glance. It is structured to use 32-bit multiply-and-add + // loops as long as possible to avoid loosing precision. + + double v = 0.0; + bool done = false; + do { + // Parse the longest part of the string starting at index j + // possible while keeping the multiplier, and thus the part + // itself, within 32 bits. + unsigned int part = 0, multiplier = 1; + while (true) { + int d; + if (*current >= '0' && *current < lim_0) { + d = *current - '0'; + } else if (*current >= 'a' && *current < lim_a) { + d = *current - 'a' + 10; + } else if (*current >= 'A' && *current < lim_A) { + d = *current - 'A' + 10; + } else { + done = true; + break; + } + + // Update the value of the part as long as the multiplier fits + // in 32 bits. When we can't guarantee that the next iteration + // will not overflow the multiplier, we stop parsing the part + // by leaving the loop. + const unsigned int kMaximumMultiplier = 0xffffffffU / 36; + uint32_t m = multiplier * radix; + if (m > kMaximumMultiplier) break; + part = part * radix + d; + multiplier = m; + ASSERT(multiplier > part); + + ++current; + if (current == end) { + done = true; + break; + } + } + + // Update the value and skip the part in the string. + v = v * multiplier + part; + } while (!done); + + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return JUNK_STRING_VALUE; + } + + return sign ? -v : v; +} + + // Converts a string to a double value. Assumes the Iterator supports // the following operations: // 1. current == end (other ops are not allowed), current != end. @@ -417,7 +454,7 @@ static double InternalStringToDouble(Iterator current, bool nonzero_digit_dropped = false; bool fractional_part = false; - double signed_zero = 0.0; + bool sign = false; if (*current == '+') { // Ignore leading sign; skip following spaces. @@ -427,7 +464,7 @@ static double InternalStringToDouble(Iterator current, buffer[buffer_pos++] = '-'; ++current; if (!AdvanceToNonspace(¤t, end)) return JUNK_STRING_VALUE; - signed_zero = -0.0; + sign = true; } static const char kInfinitySymbol[] = "Infinity"; @@ -447,11 +484,11 @@ static double InternalStringToDouble(Iterator current, bool leading_zero = false; if (*current == '0') { ++current; - if (current == end) return signed_zero; + if (current == end) return SignedZero(sign); leading_zero = true; -// It could be hexadecimal value. + // It could be hexadecimal value. if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { ++current; if (current == end) return JUNK_STRING_VALUE; // "0x". @@ -466,7 +503,7 @@ static double InternalStringToDouble(Iterator current, // Ignore leading zeros in the integer part. while (*current == '0') { ++current; - if (current == end) return signed_zero; + if (current == end) return SignedZero(sign); } } @@ -508,7 +545,7 @@ static double InternalStringToDouble(Iterator current, // leading zeros (if any). while (*current == '0') { ++current; - if (current == end) return signed_zero; + if (current == end) return SignedZero(sign); exponent--; // Move this 0 into the exponent. } } @@ -635,7 +672,7 @@ static double InternalStringToDouble(Iterator current, ASSERT(exponent == 0); buffer_pos += exp_digits; } else if (!fractional_part && significant_digits <= kMaxDigitsInInt) { - if (significant_digits == 0) return signed_zero; + if (significant_digits == 0) return SignedZero(sign); ASSERT(buffer_pos > 0); int num = 0; int start_pos = (buffer[0] == '-' ? 1 : 0); @@ -672,6 +709,25 @@ double StringToDouble(String* str, int flags, double empty_string_val) { } +double StringToInt(String* str, int radix) { + StringShape shape(str); + if (shape.IsSequentialAscii()) { + const char* begin = SeqAsciiString::cast(str)->GetChars(); + const char* end = begin + str->length(); + return InternalStringToInt(begin, end, radix); + } else if (shape.IsSequentialTwoByte()) { + const uc16* begin = SeqTwoByteString::cast(str)->GetChars(); + const uc16* end = begin + str->length(); + return InternalStringToInt(begin, end, radix); + } else { + StringInputBuffer buffer(str); + return InternalStringToInt(StringInputBufferIterator(&buffer), + StringInputBufferIterator::EndMarker(), + radix); + } +} + + double StringToDouble(const char* str, int flags, double empty_string_val) { const char* end = str + StrLength(str); diff --git a/src/conversions.h b/src/conversions.h index 4aaf0c0..c4ceea6 100644 --- a/src/conversions.h +++ b/src/conversions.h @@ -100,8 +100,7 @@ double StringToDouble(const char* str, int flags, double empty_string_val = 0); double StringToDouble(String* str, int flags, double empty_string_val = 0); // Converts a string into an integer. -int StringToInt(String* str, int index, int radix, double* value); -int StringToInt(const char* str, int index, int radix, double* value); +double StringToInt(String* str, int radix); // Converts a double to a string value according to ECMA-262 9.8.1. // The buffer should be large enough for any floating point number. diff --git a/src/runtime.cc b/src/runtime.cc index 6de0f08..301435f 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -4686,49 +4686,9 @@ static Object* Runtime_StringParseInt(Arguments args) { s->TryFlatten(); - int len = s->length(); - int i; - - // Skip leading white space. - for (i = 0; i < len && Scanner::kIsWhiteSpace.get(s->Get(i)); i++) ; - if (i == len) return Heap::nan_value(); - - // Compute the sign (default to +). - int sign = 1; - if (s->Get(i) == '-') { - sign = -1; - i++; - } else if (s->Get(i) == '+') { - i++; - } - - // Compute the radix if 0. - if (radix == 0) { - radix = 10; - if (i < len && s->Get(i) == '0') { - radix = 8; - if (i + 1 < len) { - int c = s->Get(i + 1); - if (c == 'x' || c == 'X') { - radix = 16; - i += 2; - } - } - } - } else if (radix == 16) { - // Allow 0x or 0X prefix if radix is 16. - if (i + 1 < len && s->Get(i) == '0') { - int c = s->Get(i + 1); - if (c == 'x' || c == 'X') i += 2; - } - } - - RUNTIME_ASSERT(2 <= radix && radix <= 36); - double value; - int end_index = StringToInt(s, i, radix, &value); - if (end_index != i) { - return Heap::NumberFromDouble(sign * value); - } + RUNTIME_ASSERT(radix == 0 || (2 <= radix && radix <= 36)); + double value = StringToInt(s, radix); + return Heap::NumberFromDouble(value); return Heap::nan_value(); } diff --git a/test/mjsunit/parse-int-float.js b/test/mjsunit/parse-int-float.js index ded2f66..b82cb87 100644 --- a/test/mjsunit/parse-int-float.js +++ b/test/mjsunit/parse-int-float.js @@ -42,7 +42,10 @@ assertEquals(0x12, parseInt('0x12')); assertEquals(0x12, parseInt('0x12', 16)); assertEquals(0x12, parseInt('0x12', 16.1)); assertEquals(0x12, parseInt('0x12', NaN)); - +assertTrue(isNaN(parseInt('0x '))); +assertTrue(isNaN(parseInt('0x'))); +assertTrue(isNaN(parseInt('0x ', 16))); +assertTrue(isNaN(parseInt('0x', 16))); assertEquals(12, parseInt('12aaa')); assertEquals(0.1, parseFloat('0.1'));