make comments not confuse the c++ parser
authorOswald Buddenhagen <oswald.buddenhagen@digia.com>
Thu, 1 Aug 2013 09:04:48 +0000 (11:04 +0200)
committerThe Qt Project <gerrit-noreply@qt-project.org>
Tue, 6 Aug 2013 07:30:33 +0000 (09:30 +0200)
comments can appear pretty much everywhere. consequently, handling them
as proper tokens is rather tedious (and was of course not done
consistently, leading to numerous bugs).

so take them out of the token stream and handle magic comments "in the
background".

this necessitates that we use match() a bit more sparingly, as we must
not call getToken() after a "tr()-terminating" token too early, as it
might erroneously collect subsequent magic comments too early otherwise.

Task-number: QTBUG-21876
Change-Id: I3d2168f019dfc06b0778142bcd5eb619b61e6b2d
Reviewed-by: hjk <hjk121@nokiamail.com>
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@digia.com>
src/linguist/lupdate/cpp.cpp

index 096be64..bbac669 100644 (file)
@@ -242,6 +242,9 @@ private:
     uint getChar();
     uint getToken();
     bool getMacroArgs();
+
+    void processComment();
+
     bool match(uint t);
     bool matchString(QString *s);
     bool matchEncoding();
@@ -294,7 +297,7 @@ private:
         Tok_Eof, Tok_class, Tok_friend, Tok_namespace, Tok_using, Tok_return,
         Tok_tr, Tok_trUtf8, Tok_translate, Tok_translateUtf8, Tok_trid,
         Tok_Q_OBJECT, Tok_Q_DECLARE_TR_FUNCTIONS, Tok_Access, Tok_Cancel,
-        Tok_Ident, Tok_Comment, Tok_String, Tok_Arrow, Tok_Colon, Tok_ColonColon,
+        Tok_Ident, Tok_String, Tok_Arrow, Tok_Colon, Tok_ColonColon,
         Tok_Equals, Tok_LeftBracket, Tok_RightBracket,
         Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen, Tok_RightParen, Tok_Comma, Tok_Semicolon,
         Tok_Null, Tok_Integer,
@@ -326,6 +329,14 @@ private:
     // Parser state
     uint yyTok;
 
+    QString context;
+    QString text;
+    QString comment;
+    QString extracomment;
+    QString msgid;
+    QString sourcetext;
+    TranslatorMessage::ExtraData extra;
+
     NamespaceList namespaces;
     QStack<int> namespaceDepths;
     NamespaceList functionContext;
@@ -834,7 +845,7 @@ uint CppParser::getToken()
                         *ptr++ = yyCh;
                     } while (yyCh != '\n');
                     yyWord.resize(ptr - (ushort *)yyWord.unicode());
-                    return Tok_Comment;
+                    processComment();
                 } else if (yyCh == '*') {
                     bool metAster = false;
                     ushort *ptr = (ushort *)yyWord.unicode();
@@ -855,9 +866,9 @@ uint CppParser::getToken()
                             metAster = false;
                     }
                     yyWord.resize(ptr - (ushort *)yyWord.unicode() - 2);
+                    processComment();
 
                     yyCh = getChar();
-                    return Tok_Comment;
                 }
                 break;
             case '"': {
@@ -1456,8 +1467,6 @@ bool CppParser::matchString(QString *s)
     bool matches = false;
     s->clear();
     forever {
-        while (yyTok == Tok_Comment)
-            yyTok = getToken();
         if (yyTok != Tok_String)
             return matches;
         matches = true;
@@ -1612,13 +1621,6 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
 {
     static QString strColons(QLatin1String("::"));
 
-    QString context;
-    QString text;
-    QString comment;
-    QString extracomment;
-    QString msgid;
-    QString sourcetext;
-    TranslatorMessage::ExtraData extra;
     QString prefix;
 #ifdef DIAGNOSE_RETRANSLATABILITY
     QString functionName;
@@ -1706,8 +1708,6 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                     fct.setValue(text);
                     yyTok = getToken();
                 }
-                while (yyTok == Tok_Comment)
-                    yyTok = getToken();
                 if (yyTok == Tok_Colon) {
                     // Skip any token until '{' since we might do things wrong if we find
                     // a '::' token here.
@@ -1843,10 +1843,10 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                 comment.clear();
                 bool plural = false;
 
-                if (match(Tok_RightParen)) {
+                if (yyTok == Tok_RightParen) {
                     // no comment
                 } else if (match(Tok_Comma) && matchStringOrNull(&comment)) {   //comment
-                    if (match(Tok_RightParen)) {
+                    if (yyTok == Tok_RightParen) {
                         // ok,
                     } else if (match(Tok_Comma)) {
                         plural = true;
@@ -1866,7 +1866,7 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                         int idx = functionContext.length();
                         if (idx < 2) {
                             yyMsg() << qPrintable(LU::tr("tr() cannot be called without context\n"));
-                            break;
+                            goto case_default;
                         }
                         Namespace *fctx;
                         while (!(fctx = findNamespace(functionContext, idx)->classDef)->hasTrFunctions) {
@@ -1935,6 +1935,7 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
             extracomment.clear();
             msgid.clear();
             extra.clear();
+            yyTok = getToken();
             break;
         case Tok_translateUtf8:
         case Tok_translate:
@@ -1951,14 +1952,14 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
             {
                 comment.clear();
                 bool plural = false;
-                if (!match(Tok_RightParen)) {
+                if (yyTok != Tok_RightParen) {
                     // look for comment
                     if (match(Tok_Comma) && matchStringOrNull(&comment)) {
-                        if (!match(Tok_RightParen)) {
+                        if (yyTok != Tok_RightParen) {
                             // look for encoding
                             if (match(Tok_Comma)) {
                                 if (matchEncoding()) {
-                                    if (!match(Tok_RightParen)) {
+                                    if (yyTok != Tok_RightParen) {
                                         // look for the plural quantifier,
                                         // this can be a number, an identifier or
                                         // a function call,
@@ -1970,18 +1971,18 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                                 } else {
                                     // This can be a QTranslator::translate("context",
                                     // "source", "comment", n) plural translation
-                                    if (matchExpression() && match(Tok_RightParen)) {
+                                    if (matchExpression() && yyTok == Tok_RightParen) {
                                         plural = true;
                                     } else {
-                                        break;
+                                        goto case_default;
                                     }
                                 }
                             } else {
-                                break;
+                                goto case_default;
                             }
                         }
                     } else {
-                        break;
+                        goto case_default;
                     }
                 }
                 recordMessage(line, context, text, comment, extracomment, msgid, extra, plural);
@@ -1990,6 +1991,7 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
             extracomment.clear();
             msgid.clear();
             extra.clear();
+            yyTok = getToken();
             break;
         case Tok_trid:
             if (!tor)
@@ -2031,92 +2033,6 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                     prospectiveContext.clear();
             }
             break;
-        case Tok_Comment: {
-            if (!tor)
-                goto case_default;
-            const QChar *ptr = yyWord.unicode();
-            if (*ptr == QLatin1Char(':') && ptr[1].isSpace()) {
-                yyWord.remove(0, 2);
-                extracomment += yyWord;
-                extracomment.detach();
-            } else if (*ptr == QLatin1Char('=') && ptr[1].isSpace()) {
-                yyWord.remove(0, 2);
-                msgid = yyWord.simplified();
-                msgid.detach();
-            } else if (*ptr == QLatin1Char('~') && ptr[1].isSpace()) {
-                yyWord.remove(0, 2);
-                text = yyWord.trimmed();
-                int k = text.indexOf(QLatin1Char(' '));
-                if (k > -1)
-                    extra.insert(text.left(k), text.mid(k + 1).trimmed());
-                text.clear();
-            } else if (*ptr == QLatin1Char('%') && ptr[1].isSpace()) {
-                sourcetext.reserve(sourcetext.length() + yyWord.length() - 2);
-                ushort *ptr = (ushort *)sourcetext.data() + sourcetext.length();
-                int p = 2, c;
-                forever {
-                    if (p >= yyWord.length())
-                        break;
-                    c = yyWord.unicode()[p++].unicode();
-                    if (isspace(c))
-                        continue;
-                    if (c != '"') {
-                        yyMsg() << qPrintable(LU::tr("Unexpected character in meta string\n"));
-                        break;
-                    }
-                    forever {
-                        if (p >= yyWord.length()) {
-                          whoops:
-                            yyMsg() << qPrintable(LU::tr("Unterminated meta string\n"));
-                            break;
-                        }
-                        c = yyWord.unicode()[p++].unicode();
-                        if (c == '"')
-                            break;
-                        if (c == '\\') {
-                            if (p >= yyWord.length())
-                                goto whoops;
-                            c = yyWord.unicode()[p++].unicode();
-                            if (c == '\n')
-                                goto whoops;
-                            *ptr++ = '\\';
-                        }
-                        *ptr++ = c;
-                    }
-                }
-                sourcetext.resize(ptr - (ushort *)sourcetext.data());
-            } else {
-                const ushort *uc = (const ushort *)yyWord.unicode(); // Is zero-terminated
-                int idx = 0;
-                ushort c;
-                while ((c = uc[idx]) == ' ' || c == '\t' || c == '\n')
-                    ++idx;
-                if (!memcmp(uc + idx, MagicComment.unicode(), MagicComment.length() * 2)) {
-                    idx += MagicComment.length();
-                    comment = QString::fromRawData(yyWord.unicode() + idx,
-                                                   yyWord.length() - idx).simplified();
-                    int k = comment.indexOf(QLatin1Char(' '));
-                    if (k == -1) {
-                        context = comment;
-                    } else {
-                        context = comment.left(k);
-                        comment.remove(0, k + 1);
-                        TranslatorMessage msg(
-                                transcode(context), QString(),
-                                transcode(comment), QString(),
-                                yyFileName, yyLineNo, QStringList(),
-                                TranslatorMessage::Finished, false);
-                        msg.setExtraComment(transcode(extracomment.simplified()));
-                        extracomment.clear();
-                        tor->append(msg);
-                        tor->setExtras(extra);
-                        extra.clear();
-                    }
-                }
-            }
-            yyTok = getToken();
-            break;
-        }
         case Tok_Arrow:
             yyTok = getToken();
             if (yyTok == Tok_tr || yyTok == Tok_trUtf8)
@@ -2219,6 +2135,93 @@ void CppParser::parseInternal(ConversionData &cd, const QStringList &includeStac
                                  " (or abuse of the C++ preprocessor)\n"));
 }
 
+void CppParser::processComment()
+{
+    if (!tor)
+        return;
+
+    const QChar *ptr = yyWord.unicode();
+    if (*ptr == QLatin1Char(':') && ptr[1].isSpace()) {
+        yyWord.remove(0, 2);
+        extracomment += yyWord;
+        extracomment.detach();
+    } else if (*ptr == QLatin1Char('=') && ptr[1].isSpace()) {
+        yyWord.remove(0, 2);
+        msgid = yyWord.simplified();
+        msgid.detach();
+    } else if (*ptr == QLatin1Char('~') && ptr[1].isSpace()) {
+        yyWord.remove(0, 2);
+        text = yyWord.trimmed();
+        int k = text.indexOf(QLatin1Char(' '));
+        if (k > -1)
+            extra.insert(text.left(k), text.mid(k + 1).trimmed());
+        text.clear();
+    } else if (*ptr == QLatin1Char('%') && ptr[1].isSpace()) {
+        sourcetext.reserve(sourcetext.length() + yyWord.length() - 2);
+        ushort *ptr = (ushort *)sourcetext.data() + sourcetext.length();
+        int p = 2, c;
+        forever {
+            if (p >= yyWord.length())
+                break;
+            c = yyWord.unicode()[p++].unicode();
+            if (isspace(c))
+                continue;
+            if (c != '"') {
+                yyMsg() << qPrintable(LU::tr("Unexpected character in meta string\n"));
+                break;
+            }
+            forever {
+                if (p >= yyWord.length()) {
+                  whoops:
+                    yyMsg() << qPrintable(LU::tr("Unterminated meta string\n"));
+                    break;
+                }
+                c = yyWord.unicode()[p++].unicode();
+                if (c == '"')
+                    break;
+                if (c == '\\') {
+                    if (p >= yyWord.length())
+                        goto whoops;
+                    c = yyWord.unicode()[p++].unicode();
+                    if (c == '\n')
+                        goto whoops;
+                    *ptr++ = '\\';
+                }
+                *ptr++ = c;
+            }
+        }
+        sourcetext.resize(ptr - (ushort *)sourcetext.data());
+    } else {
+        const ushort *uc = (const ushort *)yyWord.unicode(); // Is zero-terminated
+        int idx = 0;
+        ushort c;
+        while ((c = uc[idx]) == ' ' || c == '\t' || c == '\n')
+            ++idx;
+        if (!memcmp(uc + idx, MagicComment.unicode(), MagicComment.length() * 2)) {
+            idx += MagicComment.length();
+            comment = QString::fromRawData(yyWord.unicode() + idx,
+                                           yyWord.length() - idx).simplified();
+            int k = comment.indexOf(QLatin1Char(' '));
+            if (k == -1) {
+                context = comment;
+            } else {
+                context = comment.left(k);
+                comment.remove(0, k + 1);
+                TranslatorMessage msg(
+                        transcode(context), QString(),
+                        transcode(comment), QString(),
+                        yyFileName, yyLineNo, QStringList(),
+                        TranslatorMessage::Finished, false);
+                msg.setExtraComment(transcode(extracomment.simplified()));
+                extracomment.clear();
+                tor->append(msg);
+                tor->setExtras(extra);
+                extra.clear();
+            }
+        }
+    }
+}
+
 const ParseResults *CppParser::recordResults(bool isHeader)
 {
     if (tor) {