1 /****************************************************************************
3 ** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
6 ** This file is part of the tools applications of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
40 ****************************************************************************/
42 #include "preprocessor.h"
44 #include <qstringlist.h>
47 #include <qfileinfo.h>
51 #include "ppkeywords.cpp"
52 #include "keywords.cpp"
54 // transform \r\n into \n
55 // \r into \n (os9 style)
56 // backslash-newlines into newlines
57 static QByteArray cleaned(const QByteArray &input)
60 result.reserve(input.size());
61 const char *data = input.constData();
62 char *output = result.data();
66 while (*data && is_space(*data))
68 bool takeLine = (*data == '#');
69 if (*data == '%' && *(data+1) == ':') {
76 do ++data; while (*data && is_space(*data));
79 // handle \\\n, \\\r\n and \\\r
81 if (*(data + 1) == '\r') {
84 if (*data && (*(data + 1) == '\n' || (*data) == '\r')) {
91 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
96 if (ch == '\r') // os9: replace \r with \n
102 // output additional newlines to keep the correct line-numbering
103 // for the lines following the backslash-newline sequence(s)
115 result.resize(output - result.constData());
119 bool Preprocessor::preprocessOnly = false;
120 void Preprocessor::skipUntilEndif()
122 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
123 switch (symbols.at(index).token) {
137 bool Preprocessor::skipBranch()
139 while (index < symbols.size() - 1
140 && (symbols.at(index).token != PP_ENDIF
141 && symbols.at(index).token != PP_ELIF
142 && symbols.at(index).token != PP_ELSE)
144 switch (symbols.at(index).token) {
156 return (index < symbols.size() - 1);
160 enum TokenizeMode { TokenizeCpp, TokenizePreprocessor, PreparePreprocessorStatement, TokenizePreprocessorStatement, TokenizeInclude };
161 static Symbols tokenize(const QByteArray &input, int lineNum = 1, TokenizeMode mode = TokenizeCpp)
164 const char *begin = input.constData();
165 const char *data = begin;
167 if (mode == TokenizeCpp) {
170 const char *lexem = data;
172 Token token = NOTOKEN;
174 if (static_cast<signed char>(*data) < 0) {
178 int nextindex = keywords[state].next;
180 if (*data == keywords[state].defchar)
181 next = keywords[state].defnext;
182 else if (!state || nextindex)
183 next = keyword_trans[nextindex][(int)*data];
187 token = keywords[state].token;
191 // suboptimal, is_ident_char should use a table
192 if (keywords[state].ident && is_ident_char(*data))
193 token = keywords[state].ident;
195 if (token == NOTOKEN) {
203 if (token > SPECIAL_TREATMENT_MARK) {
206 data = skipQuote(data);
207 token = STRING_LITERAL;
208 // concatenate multi-line strings for easier
209 // STRING_LITERAAL handling in moc
210 if (!Preprocessor::preprocessOnly
211 && !symbols.isEmpty()
212 && symbols.last().token == STRING_LITERAL) {
214 QByteArray newString = symbols.last().unquotedLexem();
215 newString += input.mid(lexem - begin + 1, data - lexem - 2);
216 newString.prepend('\"');
217 newString.append('\"');
218 symbols.last() = Symbol(symbols.last().lineNum,
225 while (*data && (*data != '\''
227 && *(data-2)!='\\')))
231 token = CHARACTER_LITERAL;
234 // split <:: into two tokens, < and ::
239 while (is_digit_char(*data))
241 if (!*data || *data != '.') {
242 token = INTEGER_LITERAL;
243 if (data - lexem == 1 &&
244 (*data == 'x' || *data == 'X')
247 while (is_hex_char(*data))
252 token = FLOATING_LITERAL;
255 case FLOATING_LITERAL:
256 while (is_digit_char(*data))
258 if (*data == '+' || *data == '-')
260 if (*data == 'e' || *data == 'E') {
262 while (is_digit_char(*data))
265 if (*data == 'f' || *data == 'F'
266 || *data == 'l' || *data == 'L')
271 mode = PreparePreprocessorStatement;
272 while (*data && (*data == ' ' || *data == '\t'))
274 if (is_ident_char(*data))
275 mode = TokenizePreprocessorStatement;
284 const char *rewind = data;
285 while (*data && (*data == ' ' || *data == '\t'))
287 if (*data && *data == '\n') {
294 while (is_ident_char(*data))
309 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
314 token = WHITESPACE; // one comment, one whitespace
319 while (*data && (*data == ' ' || *data == '\t'))
321 if (Preprocessor::preprocessOnly) // tokenize whitespace
325 while (*data && *data != '\n')
327 continue; // ignore safely, the newline is a separator
332 #ifdef USE_LEXEM_STORE
333 if (!Preprocessor::preprocessOnly
334 && token != IDENTIFIER
335 && token != STRING_LITERAL
336 && token != FLOATING_LITERAL
337 && token != INTEGER_LITERAL)
338 symbols += Symbol(lineNum, token);
341 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
343 } else { // Preprocessor
345 const char *lexem = data;
347 Token token = NOTOKEN;
348 if (mode == TokenizePreprocessorStatement) {
349 state = pp_keyword_trans[0][(int)'#'];
350 mode = TokenizePreprocessor;
353 if (static_cast<signed char>(*data) < 0) {
358 int nextindex = pp_keywords[state].next;
360 if (*data == pp_keywords[state].defchar)
361 next = pp_keywords[state].defnext;
362 else if (!state || nextindex)
363 next = pp_keyword_trans[nextindex][(int)*data];
367 token = pp_keywords[state].token;
370 // suboptimal, is_ident_char should use a table
371 if (pp_keywords[state].ident && is_ident_char(*data))
372 token = pp_keywords[state].ident;
379 symbols += Symbol(lineNum, PP_IF);
380 symbols += Symbol(lineNum, PP_DEFINED);
383 symbols += Symbol(lineNum, PP_IF);
384 symbols += Symbol(lineNum, PP_NOT);
385 symbols += Symbol(lineNum, PP_DEFINED);
388 mode = TokenizeInclude;
391 data = skipQuote(data);
392 token = PP_STRING_LITERAL;
395 while (*data && (*data != '\''
397 && *(data-2)!='\\')))
401 token = PP_CHARACTER_LITERAL;
404 while (is_digit_char(*data))
406 if (!*data || *data != '.') {
407 token = PP_INTEGER_LITERAL;
408 if (data - lexem == 1 &&
409 (*data == 'x' || *data == 'X')
412 while (is_hex_char(*data))
417 token = PP_FLOATING_LITERAL;
420 case PP_FLOATING_LITERAL:
421 while (is_digit_char(*data))
423 if (*data == '+' || *data == '-')
425 if (*data == 'e' || *data == 'E') {
427 while (is_digit_char(*data))
430 if (*data == 'f' || *data == 'F'
431 || *data == 'l' || *data == 'L')
435 if (mode == PreparePreprocessorStatement) {
436 // rewind entire token to begin
438 mode = TokenizePreprocessorStatement;
441 while (is_ident_char(*data))
443 token = PP_IDENTIFIER;
456 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
461 token = PP_WHITESPACE; // one comment, one whitespace
464 while (*data && (*data == ' ' || *data == '\t'))
466 continue; // the preprocessor needs no whitespace
468 while (*data && *data != '\n')
470 continue; // ignore safely, the newline is a separator
477 const char *rewind = data;
478 while (*data && (*data == ' ' || *data == '\t'))
480 if (*data && *data == '\n') {
487 if (mode != TokenizeInclude)
489 token = PP_STRING_LITERAL;
490 while (*data && *data != '\n' && *(data-1) != '>')
496 if (mode == PreparePreprocessorStatement)
498 #ifdef USE_LEXEM_STORE
499 if (token != PP_IDENTIFIER
500 && token != PP_STRING_LITERAL
501 && token != PP_FLOATING_LITERAL
502 && token != PP_INTEGER_LITERAL)
503 symbols += Symbol(lineNum, token);
506 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
509 symbols += Symbol(); // eof symbol
513 void Preprocessor::substituteMacro(const MacroName ¯o, Symbols &substituted, MacroSafeSet safeset)
515 Symbols saveSymbols = symbols;
516 int saveIndex = index;
518 symbols = macros.value(macro).symbols;
522 substituteUntilNewline(substituted, safeset);
524 symbols = saveSymbols;
530 void Preprocessor::substituteUntilNewline(Symbols &substituted, MacroSafeSet safeset)
533 Token token = next();
534 if (token == PP_IDENTIFIER) {
535 MacroName macro = symbol();
536 if (macros.contains(macro) && !safeset.contains(macro)) {
537 substituteMacro(macro, substituted, safeset);
540 } else if (token == PP_DEFINED) {
541 bool braces = test(PP_LPAREN);
543 Symbol definedOrNotDefined = symbol();
544 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
545 substituted += definedOrNotDefined;
549 } else if (token == PP_NEWLINE) {
550 substituted += symbol();
553 substituted += symbol();
558 class PP_Expression : public Parser
561 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
563 int conditional_expression();
564 int logical_OR_expression();
565 int logical_AND_expression();
566 int inclusive_OR_expression();
567 int exclusive_OR_expression();
568 int AND_expression();
569 int equality_expression();
570 int relational_expression();
571 int shift_expression();
572 int additive_expression();
573 int multiplicative_expression();
574 int unary_expression();
575 bool unary_expression_lookup();
576 int primary_expression();
577 bool primary_expression_lookup();
580 int PP_Expression::conditional_expression()
582 int value = logical_OR_expression();
583 if (test(PP_QUESTION)) {
584 int alt1 = conditional_expression();
585 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
586 return value ? alt1 : alt2;
591 int PP_Expression::logical_OR_expression()
593 int value = logical_AND_expression();
595 return logical_OR_expression() || value;
599 int PP_Expression::logical_AND_expression()
601 int value = inclusive_OR_expression();
603 return logical_AND_expression() && value;
607 int PP_Expression::inclusive_OR_expression()
609 int value = exclusive_OR_expression();
611 return value | inclusive_OR_expression();
615 int PP_Expression::exclusive_OR_expression()
617 int value = AND_expression();
619 return value ^ exclusive_OR_expression();
623 int PP_Expression::AND_expression()
625 int value = equality_expression();
627 return value & AND_expression();
631 int PP_Expression::equality_expression()
633 int value = relational_expression();
636 return value == equality_expression();
638 return value != equality_expression();
645 int PP_Expression::relational_expression()
647 int value = shift_expression();
650 return value < relational_expression();
652 return value > relational_expression();
654 return value <= relational_expression();
656 return value >= relational_expression();
663 int PP_Expression::shift_expression()
665 int value = additive_expression();
668 return value << shift_expression();
670 return value >> shift_expression();
677 int PP_Expression::additive_expression()
679 int value = multiplicative_expression();
682 return value + additive_expression();
684 return value - additive_expression();
691 int PP_Expression::multiplicative_expression()
693 int value = unary_expression();
696 return value * multiplicative_expression();
699 int remainder = multiplicative_expression();
700 return remainder ? value % remainder : 0;
704 int div = multiplicative_expression();
705 return div ? value / div : 0;
713 int PP_Expression::unary_expression()
717 return unary_expression();
719 return -unary_expression();
721 return !unary_expression();
723 return ~unary_expression();
730 return primary_expression();
734 bool PP_Expression::unary_expression_lookup()
737 return (primary_expression_lookup()
745 int PP_Expression::primary_expression()
748 if (test(PP_LPAREN)) {
749 value = conditional_expression();
753 value = lexem().toInt(0, 0);
758 bool PP_Expression::primary_expression_lookup()
761 return (t == PP_IDENTIFIER
762 || t == PP_INTEGER_LITERAL
763 || t == PP_FLOATING_LITERAL
769 int Preprocessor::evaluateCondition()
771 PP_Expression expression;
772 expression.currentFilenames = currentFilenames;
774 substituteUntilNewline(expression.symbols);
776 return expression.value();
779 void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
781 currentFilenames.push(filename);
782 preprocessed.reserve(preprocessed.size() + symbols.size());
784 Token token = next();
789 int lineNum = symbol().lineNum;
792 if (test(PP_STRING_LITERAL)) {
793 local = lexem().startsWith('\"');
794 include = unquotedLexem();
802 fi.setFile(QFileInfo(QString::fromLocal8Bit(filename.constData())).dir(), QString::fromLocal8Bit(include.constData()));
803 for (int j = 0; j < Preprocessor::includes.size() && !fi.exists(); ++j) {
804 const IncludePath &p = Preprocessor::includes.at(j);
805 if (p.isFrameworkPath) {
806 const int slashPos = include.indexOf('/');
809 QByteArray frameworkCandidate = include.left(slashPos);
810 frameworkCandidate.append(".framework/Headers/");
811 fi.setFile(QString::fromLocal8Bit(QByteArray(p.path + '/' + frameworkCandidate).constData()), QString::fromLocal8Bit(include.mid(slashPos + 1).constData()));
813 fi.setFile(QString::fromLocal8Bit(p.path.constData()), QString::fromLocal8Bit(include.constData()));
815 // try again, maybe there's a file later in the include paths with the same name
823 if (!fi.exists() || fi.isDir())
825 include = fi.canonicalFilePath().toLocal8Bit();
827 if (Preprocessor::preprocessedIncludes.contains(include))
829 Preprocessor::preprocessedIncludes.insert(include);
831 QFile file(QString::fromLocal8Bit(include.constData()));
832 if (!file.open(QFile::ReadOnly))
835 QByteArray input = file.readAll();
840 Symbols saveSymbols = symbols;
841 int saveIndex = index;
843 // phase 1: get rid of backslash-newlines
844 input = cleaned(input);
846 // phase 2: tokenize for the preprocessor
847 symbols = tokenize(input);
852 // phase 3: preprocess conditions and substitute macros
853 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
854 preprocess(include, preprocessed);
855 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
857 symbols = saveSymbols;
864 QByteArray name = lexem();
868 macro.symbols.reserve(index - start - 1);
869 for (int i = start; i < index - 1; ++i)
870 macro.symbols += symbols.at(i);
871 macros.insert(name, macro);
876 QByteArray name = lexem();
883 // if (macros.contains(symbol()))
886 // we _could_ easily substitute macros by the following
887 // four lines, but we choose not to.
889 if (macros.contains(sym.lexem())) {
890 preprocessed += substitute(macros, symbols, i);
897 continue; // skip unknown preprocessor statement
901 while (!evaluateCondition()) {
922 Symbol sym = symbol();
923 if (macros.contains("QT_NO_KEYWORDS"))
924 sym.token = IDENTIFIER;
926 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
932 preprocessed += symbol();
935 currentFilenames.pop();
938 Symbols Preprocessor::preprocessed(const QByteArray &filename, FILE *file)
941 qfile.open(file, QFile::ReadOnly);
942 return preprocessed(filename, &qfile);
945 Symbols Preprocessor::preprocessed(const QByteArray &filename, QIODevice *file)
947 QByteArray input = file->readAll();
951 // phase 1: get rid of backslash-newlines
952 input = cleaned(input);
954 // phase 2: tokenize for the preprocessor
955 symbols = tokenize(input);
958 for (int j = 0; j < symbols.size(); ++j)
959 fprintf(stderr, "line %d: %s(%s)\n",
961 symbols[j].lexem().constData(),
962 tokenTypeName(symbols[j].token));
965 // phase 3: preprocess conditions and substitute macros
967 preprocess(filename, result);
970 for (int j = 0; j < result.size(); ++j)
971 fprintf(stderr, "line %d: %s(%s)\n",
973 result[j].lexem().constData(),
974 tokenTypeName(result[j].token));
980 void Preprocessor::until(Token t)
982 while(hasNext() && next() != t)