1 /****************************************************************************
3 ** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
6 ** This file is part of the tools applications of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
40 ****************************************************************************/
42 #include "preprocessor.h"
44 #include <qstringlist.h>
47 #include <qfileinfo.h>
51 #include "ppkeywords.cpp"
52 #include "keywords.cpp"
54 // transform \r\n into \n
55 // \r into \n (os9 style)
56 // backslash-newlines into newlines
57 static QByteArray cleaned(const QByteArray &input)
60 result.reserve(input.size());
61 const char *data = input.constData();
62 char *output = result.data();
66 while (*data && is_space(*data))
68 bool takeLine = (*data == '#');
69 if (*data == '%' && *(data+1) == ':') {
76 do ++data; while (*data && is_space(*data));
79 // handle \\\n, \\\r\n and \\\r
81 if (*(data + 1) == '\r') {
84 if (*data && (*(data + 1) == '\n' || (*data) == '\r')) {
91 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
96 if (ch == '\r') // os9: replace \r with \n
102 // output additional newlines to keep the correct line-numbering
103 // for the lines following the backslash-newline sequence(s)
115 result.resize(output - result.constData());
119 bool Preprocessor::preprocessOnly = false;
120 void Preprocessor::skipUntilEndif()
122 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
123 switch (symbols.at(index).token) {
137 bool Preprocessor::skipBranch()
139 while (index < symbols.size() - 1
140 && (symbols.at(index).token != PP_ENDIF
141 && symbols.at(index).token != PP_ELIF
142 && symbols.at(index).token != PP_ELSE)
144 switch (symbols.at(index).token) {
156 return (index < symbols.size() - 1);
160 enum TokenizeMode { TokenizeCpp, TokenizePreprocessor, PreparePreprocessorStatement, TokenizePreprocessorStatement, TokenizeInclude, PrepareDefine, TokenizeDefine };
161 static Symbols tokenize(const QByteArray &input, int lineNum = 1, TokenizeMode mode = TokenizeCpp)
164 const char *begin = input.constData();
165 const char *data = begin;
167 if (mode == TokenizeCpp || mode == TokenizeDefine) {
170 const char *lexem = data;
172 Token token = NOTOKEN;
174 if (static_cast<signed char>(*data) < 0) {
178 int nextindex = keywords[state].next;
180 if (*data == keywords[state].defchar)
181 next = keywords[state].defnext;
182 else if (!state || nextindex)
183 next = keyword_trans[nextindex][(int)*data];
187 token = keywords[state].token;
191 // suboptimal, is_ident_char should use a table
192 if (keywords[state].ident && is_ident_char(*data))
193 token = keywords[state].ident;
195 if (token == NOTOKEN) {
203 if (token > SPECIAL_TREATMENT_MARK) {
206 data = skipQuote(data);
207 token = STRING_LITERAL;
208 // concatenate multi-line strings for easier
209 // STRING_LITERAAL handling in moc
210 if (!Preprocessor::preprocessOnly
211 && !symbols.isEmpty()
212 && symbols.last().token == STRING_LITERAL) {
214 QByteArray newString = symbols.last().unquotedLexem();
215 newString += input.mid(lexem - begin + 1, data - lexem - 2);
216 newString.prepend('\"');
217 newString.append('\"');
218 symbols.last() = Symbol(symbols.last().lineNum,
225 while (*data && (*data != '\''
227 && *(data-2)!='\\')))
231 token = CHARACTER_LITERAL;
234 // split <:: into two tokens, < and ::
239 while (is_digit_char(*data))
241 if (!*data || *data != '.') {
242 token = INTEGER_LITERAL;
243 if (data - lexem == 1 &&
244 (*data == 'x' || *data == 'X')
247 while (is_hex_char(*data))
252 token = FLOATING_LITERAL;
255 case FLOATING_LITERAL:
256 while (is_digit_char(*data))
258 if (*data == '+' || *data == '-')
260 if (*data == 'e' || *data == 'E') {
262 while (is_digit_char(*data))
265 if (*data == 'f' || *data == 'F'
266 || *data == 'l' || *data == 'L')
271 mode = PreparePreprocessorStatement;
272 while (*data && (*data == ' ' || *data == '\t'))
274 if (is_ident_char(*data))
275 mode = TokenizePreprocessorStatement;
281 if (mode == TokenizeDefine) {
283 // emit the newline token
289 const char *rewind = data;
290 while (*data && (*data == ' ' || *data == '\t'))
292 if (*data && *data == '\n') {
299 while (is_ident_char(*data))
314 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
319 token = WHITESPACE; // one comment, one whitespace
324 while (*data && (*data == ' ' || *data == '\t'))
326 if (Preprocessor::preprocessOnly) // tokenize whitespace
330 while (*data && *data != '\n')
332 continue; // ignore safely, the newline is a separator
337 #ifdef USE_LEXEM_STORE
338 if (!Preprocessor::preprocessOnly
339 && token != IDENTIFIER
340 && token != STRING_LITERAL
341 && token != FLOATING_LITERAL
342 && token != INTEGER_LITERAL)
343 symbols += Symbol(lineNum, token);
346 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
348 } else { // Preprocessor
350 const char *lexem = data;
352 Token token = NOTOKEN;
353 if (mode == TokenizePreprocessorStatement) {
354 state = pp_keyword_trans[0][(int)'#'];
355 mode = TokenizePreprocessor;
358 if (static_cast<signed char>(*data) < 0) {
363 int nextindex = pp_keywords[state].next;
365 if (*data == pp_keywords[state].defchar)
366 next = pp_keywords[state].defnext;
367 else if (!state || nextindex)
368 next = pp_keyword_trans[nextindex][(int)*data];
372 token = pp_keywords[state].token;
375 // suboptimal, is_ident_char should use a table
376 if (pp_keywords[state].ident && is_ident_char(*data))
377 token = pp_keywords[state].ident;
384 mode = PrepareDefine;
387 symbols += Symbol(lineNum, PP_IF);
388 symbols += Symbol(lineNum, PP_DEFINED);
391 symbols += Symbol(lineNum, PP_IF);
392 symbols += Symbol(lineNum, PP_NOT);
393 symbols += Symbol(lineNum, PP_DEFINED);
396 mode = TokenizeInclude;
399 data = skipQuote(data);
400 token = PP_STRING_LITERAL;
403 while (*data && (*data != '\''
405 && *(data-2)!='\\')))
409 token = PP_CHARACTER_LITERAL;
412 while (is_digit_char(*data))
414 if (!*data || *data != '.') {
415 token = PP_INTEGER_LITERAL;
416 if (data - lexem == 1 &&
417 (*data == 'x' || *data == 'X')
420 while (is_hex_char(*data))
425 token = PP_FLOATING_LITERAL;
428 case PP_FLOATING_LITERAL:
429 while (is_digit_char(*data))
431 if (*data == '+' || *data == '-')
433 if (*data == 'e' || *data == 'E') {
435 while (is_digit_char(*data))
438 if (*data == 'f' || *data == 'F'
439 || *data == 'l' || *data == 'L')
443 if (mode == PreparePreprocessorStatement) {
444 // rewind entire token to begin
446 mode = TokenizePreprocessorStatement;
449 while (is_ident_char(*data))
451 token = PP_IDENTIFIER;
453 if (mode == PrepareDefine) {
454 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
455 // make sure we explicitly add the whitespace here, so we can distinguish
456 // correctly between regular and function macros
458 symbols += Symbol(lineNum, WHITESPACE);
459 mode = TokenizeDefine;
474 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
479 token = PP_WHITESPACE; // one comment, one whitespace
482 while (*data && (*data == ' ' || *data == '\t'))
484 continue; // the preprocessor needs no whitespace
486 while (*data && *data != '\n')
488 continue; // ignore safely, the newline is a separator
495 const char *rewind = data;
496 while (*data && (*data == ' ' || *data == '\t'))
498 if (*data && *data == '\n') {
505 if (mode != TokenizeInclude)
507 token = PP_STRING_LITERAL;
508 while (*data && *data != '\n' && *(data-1) != '>')
514 if (mode == PreparePreprocessorStatement)
516 #ifdef USE_LEXEM_STORE
517 if (token != PP_IDENTIFIER
518 && token != PP_STRING_LITERAL
519 && token != PP_FLOATING_LITERAL
520 && token != PP_INTEGER_LITERAL)
521 symbols += Symbol(lineNum, token);
524 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
527 symbols += Symbol(); // eof symbol
531 void Preprocessor::macroExpandIdentifier(const Symbol &s, Symbols &preprocessed, MacroSafeSet safeset)
534 if (!macros.contains(s)) {
539 Symbols expanded = macros.value(s).symbols;
541 // don't expand macros with arguments for now
542 if (expanded.size() && expanded.at(0).token == PP_LPAREN) {
547 for (int i = 0; i < expanded.size(); ++i) {
548 expanded[i].lineNum = s.lineNum;
549 if (expanded.at(i).token == PP_IDENTIFIER)
550 macroExpandIdentifier(expanded.at(i), preprocessed, safeset);
552 preprocessed += expanded.at(i);
557 void Preprocessor::substituteMacro(const MacroName ¯o, Symbols &substituted, MacroSafeSet safeset)
559 Symbols saveSymbols = symbols;
560 int saveIndex = index;
562 symbols = macros.value(macro).symbols;
566 substituteUntilNewline(substituted, safeset);
568 symbols = saveSymbols;
574 void Preprocessor::substituteUntilNewline(Symbols &substituted, MacroSafeSet safeset)
577 Token token = next();
578 if (token == PP_IDENTIFIER) {
579 MacroName macro = symbol();
580 if (macros.contains(macro) && !safeset.contains(macro)) {
581 substituteMacro(macro, substituted, safeset);
584 } else if (token == PP_DEFINED) {
585 bool braces = test(PP_LPAREN);
587 Symbol definedOrNotDefined = symbol();
588 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
589 substituted += definedOrNotDefined;
593 } else if (token == PP_NEWLINE) {
594 substituted += symbol();
597 substituted += symbol();
602 class PP_Expression : public Parser
605 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
607 int conditional_expression();
608 int logical_OR_expression();
609 int logical_AND_expression();
610 int inclusive_OR_expression();
611 int exclusive_OR_expression();
612 int AND_expression();
613 int equality_expression();
614 int relational_expression();
615 int shift_expression();
616 int additive_expression();
617 int multiplicative_expression();
618 int unary_expression();
619 bool unary_expression_lookup();
620 int primary_expression();
621 bool primary_expression_lookup();
624 int PP_Expression::conditional_expression()
626 int value = logical_OR_expression();
627 if (test(PP_QUESTION)) {
628 int alt1 = conditional_expression();
629 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
630 return value ? alt1 : alt2;
635 int PP_Expression::logical_OR_expression()
637 int value = logical_AND_expression();
639 return logical_OR_expression() || value;
643 int PP_Expression::logical_AND_expression()
645 int value = inclusive_OR_expression();
647 return logical_AND_expression() && value;
651 int PP_Expression::inclusive_OR_expression()
653 int value = exclusive_OR_expression();
655 return value | inclusive_OR_expression();
659 int PP_Expression::exclusive_OR_expression()
661 int value = AND_expression();
663 return value ^ exclusive_OR_expression();
667 int PP_Expression::AND_expression()
669 int value = equality_expression();
671 return value & AND_expression();
675 int PP_Expression::equality_expression()
677 int value = relational_expression();
680 return value == equality_expression();
682 return value != equality_expression();
689 int PP_Expression::relational_expression()
691 int value = shift_expression();
694 return value < relational_expression();
696 return value > relational_expression();
698 return value <= relational_expression();
700 return value >= relational_expression();
707 int PP_Expression::shift_expression()
709 int value = additive_expression();
712 return value << shift_expression();
714 return value >> shift_expression();
721 int PP_Expression::additive_expression()
723 int value = multiplicative_expression();
726 return value + additive_expression();
728 return value - additive_expression();
735 int PP_Expression::multiplicative_expression()
737 int value = unary_expression();
740 return value * multiplicative_expression();
743 int remainder = multiplicative_expression();
744 return remainder ? value % remainder : 0;
748 int div = multiplicative_expression();
749 return div ? value / div : 0;
757 int PP_Expression::unary_expression()
761 return unary_expression();
763 return -unary_expression();
765 return !unary_expression();
767 return ~unary_expression();
774 return primary_expression();
778 bool PP_Expression::unary_expression_lookup()
781 return (primary_expression_lookup()
789 int PP_Expression::primary_expression()
792 if (test(PP_LPAREN)) {
793 value = conditional_expression();
797 value = lexem().toInt(0, 0);
802 bool PP_Expression::primary_expression_lookup()
805 return (t == PP_IDENTIFIER
806 || t == PP_INTEGER_LITERAL
807 || t == PP_FLOATING_LITERAL
813 int Preprocessor::evaluateCondition()
815 PP_Expression expression;
816 expression.currentFilenames = currentFilenames;
818 substituteUntilNewline(expression.symbols);
820 return expression.value();
823 void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
825 currentFilenames.push(filename);
826 preprocessed.reserve(preprocessed.size() + symbols.size());
828 Token token = next();
833 int lineNum = symbol().lineNum;
836 if (test(PP_STRING_LITERAL)) {
837 local = lexem().startsWith('\"');
838 include = unquotedLexem();
846 fi.setFile(QFileInfo(QString::fromLocal8Bit(filename.constData())).dir(), QString::fromLocal8Bit(include.constData()));
847 for (int j = 0; j < Preprocessor::includes.size() && !fi.exists(); ++j) {
848 const IncludePath &p = Preprocessor::includes.at(j);
849 if (p.isFrameworkPath) {
850 const int slashPos = include.indexOf('/');
853 QByteArray frameworkCandidate = include.left(slashPos);
854 frameworkCandidate.append(".framework/Headers/");
855 fi.setFile(QString::fromLocal8Bit(QByteArray(p.path + '/' + frameworkCandidate).constData()), QString::fromLocal8Bit(include.mid(slashPos + 1).constData()));
857 fi.setFile(QString::fromLocal8Bit(p.path.constData()), QString::fromLocal8Bit(include.constData()));
859 // try again, maybe there's a file later in the include paths with the same name
867 if (!fi.exists() || fi.isDir())
869 include = fi.canonicalFilePath().toLocal8Bit();
871 if (Preprocessor::preprocessedIncludes.contains(include))
873 Preprocessor::preprocessedIncludes.insert(include);
875 QFile file(QString::fromLocal8Bit(include.constData()));
876 if (!file.open(QFile::ReadOnly))
879 QByteArray input = file.readAll();
884 Symbols saveSymbols = symbols;
885 int saveIndex = index;
887 // phase 1: get rid of backslash-newlines
888 input = cleaned(input);
890 // phase 2: tokenize for the preprocessor
891 symbols = tokenize(input);
896 // phase 3: preprocess conditions and substitute macros
897 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
898 preprocess(include, preprocessed);
899 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
901 symbols = saveSymbols;
908 QByteArray name = lexem();
912 macro.symbols.reserve(index - start - 1);
913 for (int i = start; i < index - 1; ++i)
914 macro.symbols += symbols.at(i);
915 macros.insert(name, macro);
920 QByteArray name = lexem();
925 case PP_IDENTIFIER: {
927 macroExpandIdentifier(symbol(), preprocessed);
932 continue; // skip unknown preprocessor statement
936 while (!evaluateCondition()) {
957 Symbol sym = symbol();
958 if (macros.contains("QT_NO_KEYWORDS"))
959 sym.token = IDENTIFIER;
961 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
967 preprocessed += symbol();
970 currentFilenames.pop();
973 Symbols Preprocessor::preprocessed(const QByteArray &filename, FILE *file)
976 qfile.open(file, QFile::ReadOnly);
977 return preprocessed(filename, &qfile);
980 Symbols Preprocessor::preprocessed(const QByteArray &filename, QIODevice *file)
982 QByteArray input = file->readAll();
986 // phase 1: get rid of backslash-newlines
987 input = cleaned(input);
989 // phase 2: tokenize for the preprocessor
990 symbols = tokenize(input);
993 for (int j = 0; j < symbols.size(); ++j)
994 fprintf(stderr, "line %d: %s(%s)\n",
996 symbols[j].lexem().constData(),
997 tokenTypeName(symbols[j].token));
1000 // phase 3: preprocess conditions and substitute macros
1002 preprocess(filename, result);
1005 for (int j = 0; j < result.size(); ++j)
1006 fprintf(stderr, "line %d: %s(%s)\n",
1008 result[j].lexem().constData(),
1009 tokenTypeName(result[j].token));
1015 void Preprocessor::until(Token t)
1017 while(hasNext() && next() != t)