Scanner::Scanner(UnicodeCache* unicode_cache)
: unicode_cache_(unicode_cache),
- capturing_raw_literal_(false),
octal_pos_(Location::invalid()),
harmony_scoping_(false),
harmony_modules_(false),
}
+template <bool capture_raw>
uc32 Scanner::ScanHexNumber(int expected_length) {
DCHECK(expected_length <= 4); // prevent overflow
return -1;
}
x = x * 16 + d;
- Advance();
+ Advance<capture_raw>();
}
return x;
}
+template <bool capture_raw>
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
uc32 x = 0;
int d = HexValue(c0_);
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) return -1;
- Advance();
+ Advance<capture_raw>();
d = HexValue(c0_);
}
return x;
}
+template <bool capture_raw>
bool Scanner::ScanEscape() {
uc32 c = c0_;
- Advance();
+ Advance<capture_raw>();
// Skip escaped newlines.
if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
// Allow CR+LF newlines in multiline string literals.
- if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
+ if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
// Allow LF+CR newlines in multiline string literals.
- if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
+ if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance<capture_raw>();
return true;
}
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case 'u' : {
- c = ScanUnicodeEscape();
+ c = ScanUnicodeEscape<capture_raw>();
if (c < 0) return false;
break;
}
case 'v' : c = '\v'; break;
case 'x' : {
- c = ScanHexNumber(2);
+ c = ScanHexNumber<capture_raw>(2);
if (c < 0) return false;
break;
}
case '4' : // fall through
case '5' : // fall through
case '6' : // fall through
- case '7' : c = ScanOctalEscape(c, 2); break;
+ case '7':
+ c = ScanOctalEscape<capture_raw>(c, 2);
+ break;
}
// According to ECMA-262, section 7.8.4, characters not covered by the
// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
// ECMA-262. Other JS VMs support them.
+template <bool capture_raw>
uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
uc32 x = c - '0';
int i = 0;
int nx = x * 8 + d;
if (nx >= 256) break;
x = nx;
- Advance();
+ Advance<capture_raw>();
}
// Anything except '\0' is an octal escape sequence, illegal in strict mode.
// Remember the position of octal escape sequences so that an error
uc32 c = c0_;
Advance();
if (c == '\\') {
- if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
+ if (c0_ < 0 || !ScanEscape<false>()) return Token::ILLEGAL;
} else {
AddLiteralChar(c);
}
// followed by an Expression.
Token::Value result = Token::TEMPLATE_SPAN;
- LiteralScope literal(this, true);
+ LiteralScope literal(this);
+ StartRawLiteral();
+ const bool capture_raw = true;
while (true) {
uc32 c = c0_;
- Advance();
+ Advance<capture_raw>();
if (c == '`') {
result = Token::TEMPLATE_TAIL;
ReduceRawLiteralLength(1);
break;
} else if (c == '$' && c0_ == '{') {
- Advance(); // Consume '{'
+ Advance<capture_raw>(); // Consume '{'
ReduceRawLiteralLength(2);
break;
} else if (c == '\\') {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
// code unit sequence.
uc32 lastChar = c0_;
- Advance();
+ Advance<capture_raw>();
if (lastChar == '\r') {
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
- Advance(); // Adds \n
+ Advance<capture_raw>(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
}
} else if (c0_ == '0') {
- Advance();
+ Advance<capture_raw>();
AddLiteralChar('0');
} else {
- ScanEscape();
+ ScanEscape<true>();
}
} else if (c < 0) {
// Unterminated template literal
if (c == '\r') {
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
- Advance(); // Adds \n
+ Advance<capture_raw>(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
Advance();
if (c0_ != 'u') return -1;
Advance();
- return ScanUnicodeEscape();
+ return ScanUnicodeEscape<false>();
}
+template <bool capture_raw>
uc32 Scanner::ScanUnicodeEscape() {
// Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
// allowed). In the latter case, the number of hex digits between { } is
// arbitrary. \ and u have already been read.
if (c0_ == '{' && HarmonyUnicode()) {
- Advance();
- uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);
+ Advance<capture_raw>();
+ uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);
if (cp < 0) {
return -1;
}
if (c0_ != '}') {
return -1;
}
- Advance();
+ Advance<capture_raw>();
return cp;
}
- return ScanHexNumber(4);
+ return ScanHexNumber<capture_raw>(4);
}
// if aborting the scanning before it's complete.
class LiteralScope {
public:
- explicit LiteralScope(Scanner* self, bool capture_raw = false)
- : scanner_(self), complete_(false) {
+ explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
scanner_->StartLiteral();
- if (capture_raw) scanner_->StartRawLiteral();
}
~LiteralScope() {
if (!complete_) scanner_->DropLiteral();
}
void Complete() {
- scanner_->TerminateLiteral();
complete_ = true;
}
static const int kCharacterLookaheadBufferSize = 1;
// Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+ template <bool capture_raw>
uc32 ScanOctalEscape(uc32 c, int length);
// Call this after setting source_ to the input.
inline void StartRawLiteral() {
raw_literal_buffer_.Reset();
next_.raw_literal_chars = &raw_literal_buffer_;
- capturing_raw_literal_ = true;
}
INLINE(void AddLiteralChar(uc32 c)) {
}
INLINE(void AddRawLiteralChar(uc32 c)) {
- DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->AddChar(c);
}
INLINE(void ReduceRawLiteralLength(int delta)) {
- DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->ReduceLength(delta);
}
- // Complete scanning of a literal.
- inline void TerminateLiteral() { capturing_raw_literal_ = false; }
-
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
next_.literal_chars = NULL;
next_.raw_literal_chars = NULL;
- capturing_raw_literal_ = false;
}
inline void AddLiteralCharAdvance() {
}
// Low-level scanning support.
+ template <bool capture_raw = false>
void Advance() {
- if (capturing_raw_literal_) {
+ if (capture_raw) {
AddRawLiteralChar(c0_);
}
c0_ = source_->Advance();
if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
- if (capturing_raw_literal_) ReduceRawLiteralLength(2);
} else {
source_->PushBack(c0_);
- if (capturing_raw_literal_) ReduceRawLiteralLength(1);
}
c0_ = ch;
}
// Literal strings are collected for identifiers, strings, numbers as well
// as for template literals. For template literals we also collect the raw
// form.
- // These functions only give the correct result if the literal
- // was scanned between calls to StartLiteral() and TerminateLiteral().
+ // These functions only give the correct result if the literal was scanned
+ // when a LiteralScope object is alive.
Vector<const uint8_t> literal_one_byte_string() {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->one_byte_literal();
return current_.raw_literal_chars->is_one_byte();
}
-
+ template <bool capture_raw>
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
// number can be 000000001, so it's very long in characters but its value is
// small.
+ template <bool capture_raw>
uc32 ScanUnlimitedLengthHexNumber(int max_value);
// Scans a single JavaScript token.
// Scans an escape-sequence which is part of a string and adds the
// decoded character to the current literal. Returns true if a pattern
// is scanned.
+ template <bool capture_raw>
bool ScanEscape();
// Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
// Helper for the above functions.
+ template <bool capture_raw>
uc32 ScanUnicodeEscape();
Token::Value ScanTemplateSpan();
// Buffer to store raw string values
LiteralBuffer raw_literal_buffer_;
- // We only need to capture the raw literal when we are scanning template
- // literal spans.
- bool capturing_raw_literal_;
-
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)