Fixed line terminators to handle LS/PS and CR+LF correctly.
authorErik Verbruggen <erik.verbruggen@digia.com>
Mon, 26 Nov 2012 12:35:24 +0000 (13:35 +0100)
committerThe Qt Project <gerrit-noreply@qt-project.org>
Tue, 27 Nov 2012 16:20:29 +0000 (17:20 +0100)
Change-Id: Ie1cd4d662c80630155e922dce38ea01d8ee2504a
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
src/qml/qml/parser/qqmljslexer.cpp
src/qml/qml/parser/qqmljslexer_p.h

index f9cffc8..34e5d61 100644 (file)
@@ -166,10 +166,13 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
 
 void Lexer::scanChar()
 {
+    unsigned sequenceLength = isLineTerminatorSequence();
     _char = *_codePtr++;
+    if (sequenceLength == 2)
+        _char = *_codePtr++;
 
-    if (_char == QLatin1Char('\n')) {
-        _lastLinePtr = _codePtr; // points to the first character after the newline
+    if (unsigned sequenceLength = isLineTerminatorSequence()) {
+        _lastLinePtr = _codePtr + sequenceLength - 1; // points to the first character after the newline
         ++_currentLineNumber;
     }
 }
@@ -287,13 +290,13 @@ again:
     _tokenLinePtr = _lastLinePtr;
 
     while (_char.isSpace()) {
-        if (_char == QLatin1Char('\n')) {
-            _tokenLinePtr = _codePtr;
+        if (unsigned sequenceLength = isLineTerminatorSequence()) {
+            _tokenLinePtr = _codePtr + sequenceLength - 1;
 
             if (_restrictedKeyword) {
                 // automatic semicolon insertion
                 _tokenLine = _currentLineNumber;
-                _tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
+                _tokenStartPtr = _codePtr - 1;
                 return T_SEMICOLON;
             } else {
                 _terminator = true;
@@ -410,7 +413,7 @@ again:
                 }
             }
         } else if (_char == QLatin1Char('/')) {
-            while (!_char.isNull() && _char != QLatin1Char('\n')) {
+            while (!_char.isNull() && !isLineTerminator()) {
                 scanChar();
             }
             if (_engine) {
@@ -553,7 +556,7 @@ again:
 
         if (_engine) {
             while (!_char.isNull()) {
-                if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
+                if (isLineTerminator() || _char == QLatin1Char('\\')) {
                     break;
                 } else if (_char == quote) {
                     _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
@@ -572,9 +575,11 @@ again:
             _tokenText += *startCode++;
 
         while (! _char.isNull()) {
-            if (_char == QLatin1Char('\n')) {
+            if (unsigned sequenceLength = isLineTerminatorSequence()) {
                 multilineStringLiteral = true;
                 _tokenText += _char;
+                if (sequenceLength == 2)
+                    _tokenText += *_codePtr;
                 scanChar();
             } else if (_char == quote) {
                 scanChar();
@@ -637,23 +642,23 @@ again:
                     break;
 
                 case '\r':
-                    while (_char == QLatin1Char('\r'))
-                        scanChar();
-
-                    if (_char == QLatin1Char('\n')) {
-                        u = _char;
-                        scanChar();
-                    } else {
+                    if (isLineTerminatorSequence() == 2) {
+                        _tokenText += QLatin1Char('\r');
                         u = QLatin1Char('\n');
+                    } else {
+                        u = QLatin1Char('\r');
                     }
-
+                    scanChar();
                     break;
 
                 case '\n':
+                case 0x2028u:
+                case 0x2029u:
                     u = _char;
                     scanChar();
                     break;
 
+
                 default:
                     // non escape character
                     u = _char;
@@ -872,11 +877,6 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
 
     while (true) {
         switch (_char.unicode()) {
-        case 0: // eof
-        case '\n': case '\r': // line terminator
-            _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
-            return false;
-
         case '/':
             scanChar();
 
@@ -946,8 +946,13 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
             break;
 
         default:
-            _tokenText += _char;
-            scanChar();
+            if (_char.isNull() || isLineTerminator()) {
+                _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
+                return false;
+            } else {
+                _tokenText += _char;
+                scanChar();
+            }
         } // switch
     } // while
 
@@ -956,7 +961,28 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
 
 bool Lexer::isLineTerminator() const
 {
-    return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
+    const ushort unicode = _char.unicode();
+    return unicode == 0x000Au
+            || unicode == 0x000Du
+            || unicode == 0x2028u
+            || unicode == 0x2029u;
+}
+
+unsigned Lexer::isLineTerminatorSequence() const
+{
+    switch (_char.unicode()) {
+    case 0x000Au:
+    case 0x2028u:
+    case 0x2029u:
+        return 1;
+    case 0x000Du:
+        if (_codePtr->unicode() == 0x000Au)
+            return 2;
+        else
+            return 1;
+    default:
+        return 0;
+    }
 }
 
 bool Lexer::isIdentLetter(QChar ch)
index 3bb3112..c725d2a 100644 (file)
@@ -194,6 +194,7 @@ private:
     int scanNumber(QChar ch);
 
     bool isLineTerminator() const;
+    unsigned isLineTerminatorSequence() const;
     static bool isIdentLetter(QChar c);
     static bool isDecimalDigit(ushort c);
     static bool isHexDigit(QChar c);