From 84f2f5ee90b0081f09aa8a646d9f7441e9617d89 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Mon, 29 Nov 2010 13:24:37 +0000 Subject: [PATCH] Preparser extracted into separate files that can be compiled to a library. No scons target yet. Review URL: http://codereview.chromium.org/5295004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5899 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- include/v8-preparser.h | 123 ++++++++++++++++++++++++++++++++++ preparser/preparser-process.cc | 147 ++++++++++++++++------------------------- src/parser.cc | 58 +++++++++------- src/parser.h | 23 ++++++- src/preparser-api.cc | 128 +++++++++++++++++++++++++++++++++++ src/preparser.cc | 106 ++++++++++++++--------------- src/preparser.h | 138 +++++++++++++++++++++++--------------- src/scanner-base.cc | 2 +- src/scanner-base.h | 4 -- src/scanner.cc | 24 +------ src/scanner.h | 2 - test/cctest/test-parsing.cc | 39 ++++++++++- 12 files changed, 539 insertions(+), 255 deletions(-) create mode 100644 include/v8-preparser.h create mode 100644 src/preparser-api.cc diff --git a/include/v8-preparser.h b/include/v8-preparser.h new file mode 100644 index 0000000..68ce502 --- /dev/null +++ b/include/v8-preparser.h @@ -0,0 +1,123 @@ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef PREPARSER_H +#define PREPARSER_H + +#include "v8stdint.h" + +#ifdef _WIN32 + +// Setup for Windows DLL export/import. When building the V8 DLL the +// BUILDING_V8_SHARED needs to be defined. When building a program which uses +// the V8 DLL USING_V8_SHARED needs to be defined. When either building the V8 +// static library or building a program which uses the V8 static library neither +// BUILDING_V8_SHARED nor USING_V8_SHARED should be defined. +#if defined(BUILDING_V8_SHARED) && defined(USING_V8_SHARED) +#error both BUILDING_V8_SHARED and USING_V8_SHARED are set - please check the\ + build configuration to ensure that at most one of these is set +#endif + +#ifdef BUILDING_V8_SHARED +#define V8EXPORT __declspec(dllexport) +#elif USING_V8_SHARED +#define V8EXPORT __declspec(dllimport) +#else +#define V8EXPORT +#endif // BUILDING_V8_SHARED + +#else // _WIN32 + +// Setup for Linux shared library export. There is no need to distinguish +// between building or using the V8 shared library, but we should not +// export symbols when we are building a static library. +#if defined(__GNUC__) && (__GNUC__ >= 4) && defined(V8_SHARED) +#define V8EXPORT __attribute__ ((visibility("default"))) +#else // defined(__GNUC__) && (__GNUC__ >= 4) +#define V8EXPORT +#endif // defined(__GNUC__) && (__GNUC__ >= 4) + +#endif // _WIN32 + + +namespace v8 { + + +class PreParserData { + public: + PreParserData(size_t size, const uint8_t* data) + : data_(data), size_(size) { } + + // Create a PreParserData value where stack_overflow reports true. + static PreParserData StackOverflow() { return PreParserData(NULL, 0); } + // Whether the pre-parser stopped due to a stack overflow. + // If this is the case, size() and data() should not be used. + + bool stack_overflow() { return size_ == 0u; } + + // The size of the data in bytes. + size_t size() const { return size_; } + + // Pointer to the data. + const uint8_t* data() const { return data_; } + + private: + const uint8_t* const data_; + const size_t size_; +}; + + +// Interface for a stream of Unicode characters. +class UnicodeInputStream { + public: + virtual ~UnicodeInputStream(); + + // Returns the next Unicode code-point in the input, or a negative value when + // there is no more input in the stream. + virtual int32_t Next() = 0; + + // Pushes a read character back into the stream, so that it will be the next + // to be read by Advance(). The character pushed back must be the most + // recently read character that hasn't already been pushed back (i.e., if + // pushing back more than one character, they must occur in the opposite order + // of the one they were read in). + virtual void PushBack(int32_t ch) = 0; +}; + + +// Preparse a JavaScript program. The source code is provided as a +// UnicodeInputStream. The max_stack_size limits the amount of stack +// space that the preparser is allowed to use. If the preparser uses +// more stack space than the limit provided, the result's stack_overflow() +// method will return true. Otherwise the result contains preparser +// data that can be used by the V8 parser to speed up parsing. +PreParserData V8EXPORT Preparse(UnicodeInputStream* input, + size_t max_stack_size); + +} // namespace v8. + +#endif // PREPARSER_H diff --git a/preparser/preparser-process.cc b/preparser/preparser-process.cc index 706a225..80e8350 100644 --- a/preparser/preparser-process.cc +++ b/preparser/preparser-process.cc @@ -25,17 +25,11 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include "../include/v8stdint.h" -#include "globals.h" -#include "checks.h" -#include "allocation.h" -#include "utils.h" -#include "list.h" -#include "smart-pointer.h" -#include "scanner-base.h" -#include "preparse-data.h" -#include "preparser.h" +#include "../include/v8-preparser.h" +#include "unicode-inl.h" enum ResultCode { kSuccess = 0, kErrorReading = 1, kErrorWriting = 2 }; @@ -45,78 +39,66 @@ namespace internal { // THIS FILE IS PROOF-OF-CONCEPT ONLY. // The final goal is a stand-alone preparser library. -// UTF16Buffer based on an UTF-8 string in memory. -class UTF8UTF16Buffer : public UTF16Buffer { + +class UTF8InputStream : public v8::UnicodeInputStream { public: - UTF8UTF16Buffer(uint8_t* buffer, size_t length) - : UTF16Buffer(), - buffer_(buffer), + UTF8InputStream(uint8_t* buffer, size_t length) + : buffer_(buffer), offset_(0), + pos_(0), end_offset_(static_cast(length)) { } - virtual void PushBack(uc32 ch) { + virtual ~UTF8InputStream() { } + + virtual void PushBack(int32_t ch) { // Pushback assumes that the character pushed back is the // one that was most recently read, and jumps back in the // UTF-8 stream by the length of that character's encoding. offset_ -= unibrow::Utf8::Length(ch); pos_--; #ifdef DEBUG - int tmp = 0; - ASSERT_EQ(ch, unibrow::Utf8::ValueOf(buffer_ + offset_, - end_offset_ - offset_, - &tmp); + if (static_cast(ch) <= unibrow::Utf8::kMaxOneByteChar) { + if (ch != buffer_[offset_]) { + fprintf(stderr, "Invalid pushback: '%c'.", ch); + exit(1); + } + } else { + unsigned tmp = 0; + if (static_cast(ch) != + unibrow::Utf8::CalculateValue(buffer_ + offset_, + end_offset_ - offset_, + &tmp)) { + fprintf(stderr, "Invalid pushback: 0x%x.", ch); + exit(1); + } + } #endif } - virtual uc32 Advance() { + virtual int32_t Next() { if (offset_ == end_offset_) return -1; uint8_t first_char = buffer_[offset_]; if (first_char <= unibrow::Utf8::kMaxOneByteChar) { pos_++; offset_++; - return static_cast(first_char); + return static_cast(first_char); } unibrow::uchar codepoint = unibrow::Utf8::CalculateValue(buffer_ + offset_, end_offset_ - offset_, &offset_); pos_++; - return static_cast(codepoint); - } - - virtual void SeekForward(int pos) { - while (pos_ < pos) { - uint8_t first_byte = buffer_[offset_++]; - while (first_byte & 0x80u && offset_ < end_offset_) { - offset_++; - first_byte <<= 1; - } - pos_++; - } + return static_cast(codepoint); } private: const uint8_t* buffer_; unsigned offset_; + unsigned pos_; unsigned end_offset_; }; -class StandAloneJavaScriptScanner : public JavaScriptScanner { - public: - void Initialize(UTF16Buffer* source) { - source_ = source; - literal_flags_ = kLiteralString | kLiteralIdentifier; - Init(); - // Skip initial whitespace allowing HTML comment ends just like - // after a newline and scan first token. - has_line_terminator_before_next_ = true; - SkipWhiteSpace(); - Scan(); - } -}; - - // Write a number to dest in network byte order. void WriteUInt32(FILE* dest, uint32_t value, bool* ok) { for (int i = 3; i >= 0; i--) { @@ -150,56 +132,55 @@ bool ReadBuffer(FILE* source, void* buffer, size_t length) { } -bool WriteBuffer(FILE* dest, void* buffer, size_t length) { +bool WriteBuffer(FILE* dest, const void* buffer, size_t length) { size_t actually_written = fwrite(buffer, 1, length, dest); return (actually_written == length); } + +template +class ScopedPointer { + public: + explicit ScopedPointer(T* pointer) : pointer_(pointer) {} + ~ScopedPointer() { delete[] pointer_; } + T& operator[](int index) { return pointer_[index]; } + T* operator*() { return pointer_ ;} + private: + T* pointer_; +}; + + // Preparse stdin and output result on stdout. int PreParseIO() { fprintf(stderr, "LOG: Enter parsing loop\n"); bool ok = true; uint32_t length = ReadUInt32(stdin, &ok); if (!ok) return kErrorReading; - SmartPointer buffer(NewArray(length)); + ScopedPointer buffer(new uint8_t[length]); + if (!ReadBuffer(stdin, *buffer, length)) { return kErrorReading; } - UTF8UTF16Buffer input_buffer(*buffer, static_cast(length)); - StandAloneJavaScriptScanner scanner; - scanner.Initialize(&input_buffer); - CompleteParserRecorder recorder; - preparser::PreParser preparser; - - if (!preparser.PreParseProgram(&scanner, &recorder, true)) { - if (scanner.stack_overflow()) { - // Report stack overflow error/no-preparser-data. - WriteUInt32(stdout, 0, &ok); - if (!ok) return kErrorWriting; - return 0; - } + UTF8InputStream input_buffer(*buffer, static_cast(length)); + + v8::PreParserData data = + v8::Preparse(&input_buffer, 64 * sizeof(void*)); // NOLINT + if (data.stack_overflow()) { + // Report stack overflow error/no-preparser-data. + WriteUInt32(stdout, 0, &ok); + if (!ok) return kErrorWriting; + return 0; } - Vector pre_data = recorder.ExtractData(); - uint32_t size = static_cast(pre_data.length() * sizeof(uint32_t)); + uint32_t size = data.size(); WriteUInt32(stdout, size, &ok); if (!ok) return kErrorWriting; - if (!WriteBuffer(stdout, - reinterpret_cast(pre_data.start()), - size)) { + if (!WriteBuffer(stdout, data.data(), size)) { return kErrorWriting; } return 0; } -// Functions declared by allocation.h - -void FatalProcessOutOfMemory(const char* location) { - V8_Fatal("", 0, location); -} - -bool EnableSlowAsserts() { return true; } - } } // namespace v8::internal @@ -211,17 +192,3 @@ int main(int argc, char* argv[]) { fprintf(stderr, "EXIT: Failure %d\n", status); return EXIT_FAILURE; } - - -// Fatal error handling declared by checks.h. - -extern "C" void V8_Fatal(const char* file, int line, const char* format, ...) { - fflush(stdout); - fflush(stderr); - va_list arguments; - va_start(arguments, format); - vfprintf(stderr, format, arguments); - va_end(arguments); - fputs("\n#\n\n", stderr); - exit(EXIT_FAILURE); -} diff --git a/src/parser.cc b/src/parser.cc index 186d102..d147dff 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -593,7 +593,8 @@ Parser::Parser(Handle