From 5c39d9c741a0fb3c60d8be8148a7ec89c3075e6c Mon Sep 17 00:00:00 2001 From: "christian.plesner.hansen@gmail.com" Date: Mon, 1 Dec 2008 15:32:20 +0000 Subject: [PATCH] Added checking in the regular expression parser that we're below the stack limit and that the zone allocation limit hasn't been met. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@879 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/parser.cc | 106 ++++++++++++++++++++++++++------------------- src/top.cc | 8 ++-- src/top.h | 2 + src/zone-inl.h | 5 +++ src/zone.cc | 15 +++++-- src/zone.h | 13 ++++++ test/cctest/test-regexp.cc | 4 ++ 7 files changed, 102 insertions(+), 51 deletions(-) diff --git a/src/parser.cc b/src/parser.cc index 42d4d3f..50412c4 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -495,10 +495,10 @@ class RegExpParser { RegExpParser(FlatStringReader* in, Handle* error, bool multiline_mode); - RegExpTree* ParsePattern(bool* ok); - RegExpTree* ParseDisjunction(bool* ok); - RegExpTree* ParseGroup(bool* ok); - RegExpTree* ParseCharacterClass(bool* ok); + RegExpTree* ParsePattern(); + RegExpTree* ParseDisjunction(); + RegExpTree* ParseGroup(); + RegExpTree* ParseCharacterClass(); // Parses a {...,...} quantifier and stores the range in the given // out parameters. @@ -506,13 +506,13 @@ class RegExpParser { // Parses and returns a single escaped character. The character // must not be 'b' or 'B' since they are usually handle specially. - uc32 ParseClassCharacterEscape(bool* ok); + uc32 ParseClassCharacterEscape(); // Checks whether the following is a length-digit hexadecimal number, // and sets the value if it is. bool ParseHexEscape(int length, uc32* value); - uc32 ParseControlLetterEscape(bool* ok); + uc32 ParseControlLetterEscape(); uc32 ParseOctalLiteral(); // Tries to parse the input as a back reference. If successful it @@ -521,9 +521,8 @@ class RegExpParser { // can be reparsed. bool ParseBackReferenceIndex(int* index_out); - CharacterRange ParseClassAtom(uc16* char_class, - bool* ok); - RegExpTree* ReportError(Vector message, bool* ok); + CharacterRange ParseClassAtom(uc16* char_class); + RegExpTree* ReportError(Vector message); void Advance(); void Advance(int dist); void Reset(int pos); @@ -532,6 +531,7 @@ class RegExpParser { int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int position() { return next_pos_ - 1; } + bool failed() { return failed_; } static const uc32 kEndMarker = (1 << 21); private: @@ -553,6 +553,7 @@ class RegExpParser { bool is_scanned_for_captures_; // The capture count is only valid after we have scanned for captures. int capture_count_; + bool failed_; }; @@ -1027,6 +1028,11 @@ class LexicalScope BASE_EMBEDDED { #define DUMMY ) // to make indentation work #undef DUMMY +#define CHECK_FAILED ); \ + if (failed_) return NULL; \ + ((void)0 +#define DUMMY ) // to make indentation work +#undef DUMMY // ---------------------------------------------------------------------------- // Implementation of Parser @@ -3499,7 +3505,8 @@ RegExpParser::RegExpParser(FlatStringReader* in, has_character_escapes_(false), captures_(NULL), is_scanned_for_captures_(false), - capture_count_(0) { + capture_count_(0), + failed_(false) { Advance(1); } @@ -3515,8 +3522,15 @@ uc32 RegExpParser::Next() { void RegExpParser::Advance() { if (next_pos_ < in()->length()) { - current_ = in()->Get(next_pos_); - next_pos_++; + StackLimitCheck check; + if (check.HasOverflowed()) { + ReportError(CStrVector(Top::kStackOverflowMessage)); + } else if (Zone::excess_allocation()) { + ReportError(CStrVector("Regular expression too large")); + } else { + current_ = in()->Get(next_pos_); + next_pos_++; + } } else { current_ = kEndMarker; has_more_ = false; @@ -3543,19 +3557,22 @@ bool RegExpParser::HasCharacterEscapes() { return has_character_escapes_; } -RegExpTree* RegExpParser::ReportError(Vector message, bool* ok) { - *ok = false; +RegExpTree* RegExpParser::ReportError(Vector message) { + failed_ = true; *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); + // Zip to the end to make sure the no more input is read. + current_ = kEndMarker; + next_pos_ = in()->length(); return NULL; } // Pattern :: // Disjunction -RegExpTree* RegExpParser::ParsePattern(bool* ok) { - RegExpTree* result = ParseDisjunction(CHECK_OK); +RegExpTree* RegExpParser::ParsePattern() { + RegExpTree* result = ParseDisjunction(CHECK_FAILED); if (has_more()) { - ReportError(CStrVector("Unmatched ')'"), CHECK_OK); + ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); } return result; } @@ -3579,7 +3596,7 @@ bool RegExpParser::CaptureAvailable(int index) { // Assertion // Atom // Atom Quantifier -RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { +RegExpTree* RegExpParser::ParseDisjunction() { RegExpBuilder builder; int capture_start_index = captures_started(); while (true) { @@ -3603,7 +3620,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { case '*': case '+': case '?': - ReportError(CStrVector("Nothing to repeat"), CHECK_OK); + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); case '^': { Advance(); RegExpAssertion::Type type = @@ -3630,12 +3647,12 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { break; } case '(': { - RegExpTree* atom = ParseGroup(CHECK_OK); + RegExpTree* atom = ParseGroup(CHECK_FAILED); builder.AddAtom(atom); break; } case '[': { - RegExpTree* atom = ParseCharacterClass(CHECK_OK); + RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); builder.AddAtom(atom); break; } @@ -3644,7 +3661,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { case '\\': switch (Next()) { case kEndMarker: - ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); + ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); case 'b': Advance(2); builder.AddAssertion( @@ -3722,7 +3739,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { break; case 'c': { Advance(2); - uc32 control = ParseControlLetterEscape(ok); + uc32 control = ParseControlLetterEscape(); builder.AddCharacter(control); break; } @@ -3757,7 +3774,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { case '{': { int dummy; if (ParseIntervalQuantifier(&dummy, &dummy)) { - ReportError(CStrVector("Nothing to repeat"), CHECK_OK); + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); } // fallthrough } @@ -3974,9 +3991,9 @@ bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { // Upper and lower case letters differ by one bit. STATIC_CHECK(('a' ^ 'A') == 0x20); -uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { +uc32 RegExpParser::ParseControlLetterEscape() { if (!has_more()) { - ReportError(CStrVector("\\c at end of pattern"), ok); + ReportError(CStrVector("\\c at end of pattern")); return '\0'; } uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. @@ -4030,7 +4047,7 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) { } -uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { +uc32 RegExpParser::ParseClassCharacterEscape() { ASSERT(current() == '\\'); ASSERT(has_next() && !IsSpecialClassEscape(Next())); Advance(); @@ -4056,7 +4073,7 @@ uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { Advance(); return '\v'; case 'c': - return ParseControlLetterEscape(ok); + return ParseControlLetterEscape(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': // For compatibility, we interpret a decimal escape that isn't @@ -4096,7 +4113,7 @@ uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { } -RegExpTree* RegExpParser::ParseGroup(bool* ok) { +RegExpTree* RegExpParser::ParseGroup() { ASSERT_EQ(current(), '('); char type = '('; Advance(); @@ -4107,7 +4124,7 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) { Advance(2); break; default: - ReportError(CStrVector("Invalid group"), CHECK_OK); + ReportError(CStrVector("Invalid group") CHECK_FAILED); break; } } else { @@ -4117,9 +4134,9 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) { captures_->Add(NULL); } int capture_index = captures_started(); - RegExpTree* body = ParseDisjunction(CHECK_OK); + RegExpTree* body = ParseDisjunction(CHECK_FAILED); if (current() != ')') { - ReportError(CStrVector("Unterminated group"), CHECK_OK); + ReportError(CStrVector("Unterminated group") CHECK_FAILED); } Advance(); @@ -4157,7 +4174,7 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) { } -CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) { +CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { ASSERT_EQ(0, *char_class); uc32 first = current(); if (first == '\\') { @@ -4168,7 +4185,7 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) { return CharacterRange::Singleton(0); // Return dummy value. } default: - uc32 c = ParseClassCharacterEscape(CHECK_OK); + uc32 c = ParseClassCharacterEscape(CHECK_FAILED); return CharacterRange::Singleton(c); } } else { @@ -4178,7 +4195,7 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) { } -RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { +RegExpTree* RegExpParser::ParseCharacterClass() { static const char* kUnterminated = "Unterminated character class"; static const char* kRangeOutOfOrder = "Range out of order in character class"; @@ -4192,7 +4209,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { ZoneList* ranges = new ZoneList(2); while (has_more() && current() != ']') { uc16 char_class = 0; - CharacterRange first = ParseClassAtom(&char_class, CHECK_OK); + CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); if (char_class) { CharacterRange::AddClassEscape(char_class, ranges); continue; @@ -4208,7 +4225,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { ranges->Add(CharacterRange::Singleton('-')); break; } - CharacterRange next = ParseClassAtom(&char_class, CHECK_OK); + CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED); if (char_class) { ranges->Add(first); ranges->Add(CharacterRange::Singleton('-')); @@ -4216,7 +4233,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { continue; } if (first.from() > next.to()) { - return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); + return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); } ranges->Add(CharacterRange::Range(first.from(), next.to())); } else { @@ -4224,7 +4241,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { } } if (!has_more()) { - return ReportError(CStrVector(kUnterminated), CHECK_OK); + return ReportError(CStrVector(kUnterminated) CHECK_FAILED); } Advance(); if (ranges->length() == 0) { @@ -4287,21 +4304,20 @@ bool ParseRegExp(FlatStringReader* input, bool multiline, RegExpParseResult* result) { ASSERT(result != NULL); + // Make sure we have a stack guard. + StackGuard guard; RegExpParser parser(input, &result->error, multiline); - bool ok = true; - result->tree = parser.ParsePattern(&ok); - if (!ok) { + result->tree = parser.ParsePattern(); + if (parser.failed()) { ASSERT(result->tree == NULL); ASSERT(!result->error.is_null()); } else { ASSERT(result->tree != NULL); ASSERT(result->error.is_null()); - } - if (ok) { result->has_character_escapes = parser.HasCharacterEscapes(); result->capture_count = parser.captures_started(); } - return ok; + return !parser.failed(); } diff --git a/src/top.cc b/src/top.cc index ace79a9..7e239ec 100644 --- a/src/top.cc +++ b/src/top.cc @@ -603,6 +603,10 @@ bool Top::MayIndexedAccess(JSObject* receiver, } +const char* Top::kStackOverflowMessage = + "Uncaught RangeError: Maximum call stack size exceeded"; + + Failure* Top::StackOverflow() { HandleScope scope; Handle key = Factory::stack_overflow_symbol(); @@ -616,9 +620,7 @@ Failure* Top::StackOverflow() { // doesn't use ReportUncaughtException to determine the location // from where the exception occurred. It should probably be // reworked. - static const char* kMessage = - "Uncaught RangeError: Maximum call stack size exceeded"; - DoThrow(*exception, NULL, kMessage); + DoThrow(*exception, NULL, kStackOverflowMessage); return Failure::Exception(); } diff --git a/src/top.h b/src/top.h index 431f05e..29d46b5 100644 --- a/src/top.h +++ b/src/top.h @@ -271,6 +271,8 @@ class Top { static char* ArchiveThread(char* to); static char* RestoreThread(char* from); + static const char* kStackOverflowMessage; + private: // The context that initiated this JS execution. static ThreadLocalTop thread_local_; diff --git a/src/zone-inl.h b/src/zone-inl.h index 7ed4e6b..6e64c42 100644 --- a/src/zone-inl.h +++ b/src/zone-inl.h @@ -48,6 +48,11 @@ inline void* Zone::New(int size) { } +bool Zone::excess_allocation() { + return segment_bytes_allocated_ > zone_excess_limit_; +} + + } } // namespace v8::internal #endif // V8_ZONE_INL_H_ diff --git a/src/zone.cc b/src/zone.cc index d37f4f7..f4908c5 100644 --- a/src/zone.cc +++ b/src/zone.cc @@ -34,6 +34,8 @@ namespace v8 { namespace internal { Address Zone::position_ = 0; Address Zone::limit_ = 0; +int Zone::zone_excess_limit_ = 256 * MB; +int Zone::segment_bytes_allocated_ = 0; bool AssertNoZoneAllocation::allow_allocation_ = true; @@ -63,6 +65,7 @@ class Segment { // of the segment chain. Returns the new segment. static Segment* New(int size) { Segment* result = reinterpret_cast(Malloced::New(size)); + Zone::segment_bytes_allocated_ += size; if (result != NULL) { result->next_ = head_; result->size_ = size; @@ -72,10 +75,13 @@ class Segment { } // Deletes the given segment. Does not touch the segment chain. - static void Delete(Segment* segment) { + static void Delete(Segment* segment, int size) { + Zone::segment_bytes_allocated_ -= size; Malloced::Delete(segment); } + static int bytes_allocated() { return bytes_allocated_; } + private: // Computes the address of the nth byte in this segment. Address address(int n) const { @@ -83,12 +89,14 @@ class Segment { } static Segment* head_; + static int bytes_allocated_; Segment* next_; int size_; }; Segment* Segment::head_ = NULL; +int Segment::bytes_allocated_ = 0; void Zone::DeleteAll() { @@ -112,11 +120,12 @@ void Zone::DeleteAll() { // Unlink the segment we wish to keep from the list. current->clear_next(); } else { + int size = current->size(); #ifdef DEBUG // Zap the entire current segment (including the header). - memset(current, kZapDeadByte, current->size()); + memset(current, kZapDeadByte, size); #endif - Segment::Delete(current); + Segment::Delete(current, size); } current = next; } diff --git a/src/zone.h b/src/zone.h index c7129ec..9721b07 100644 --- a/src/zone.h +++ b/src/zone.h @@ -61,7 +61,13 @@ class Zone { // Delete all objects and free all memory allocated in the Zone. static void DeleteAll(); + // Returns true if more memory has been allocated in zones than + // the limit allows. + static inline bool excess_allocation(); + private: + friend class Segment; + // All pointers returned from New() have this alignment. static const int kAlignment = kPointerSize; @@ -71,6 +77,13 @@ class Zone { // Never keep segments larger than this size in bytes around. static const int kMaximumKeptSegmentSize = 64 * KB; + // Report zone excess when allocation exceeds this limit. + static int zone_excess_limit_; + + // The number of bytes allocated in segments. Note that this number + // includes memory allocated from the OS but not yet allocated from + // the zone. + static int segment_bytes_allocated_; // The Zone is intentionally a singleton; you should not try to // allocate instances of the class. diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 558003f..8be425e 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -51,6 +51,7 @@ using namespace v8::internal; static SmartPointer Parse(const char* input) { + V8::Initialize(NULL); v8::HandleScope scope; ZoneScope zone_scope(DELETE_ON_EXIT); FlatStringReader reader(CStrVector(input)); @@ -63,6 +64,7 @@ static SmartPointer Parse(const char* input) { } static bool ParseEscapes(const char* input) { + V8::Initialize(NULL); v8::HandleScope scope; unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); ZoneScope zone_scope(DELETE_ON_EXIT); @@ -253,6 +255,7 @@ TEST(ParserRegression) { static void ExpectError(const char* input, const char* expected) { + V8::Initialize(NULL); v8::HandleScope scope; ZoneScope zone_scope(DELETE_ON_EXIT); FlatStringReader reader(CStrVector(input)); @@ -372,6 +375,7 @@ TEST(CharacterClassEscapes) { static RegExpNode* Compile(const char* input, bool multiline) { + V8::Initialize(NULL); FlatStringReader reader(CStrVector(input)); RegExpParseResult result; if (!v8::internal::ParseRegExp(&reader, multiline, &result)) -- 2.7.4