From: Jordan Rose Date: Thu, 21 Feb 2013 18:53:19 +0000 (+0000) Subject: Preprocessor: preserve whitespace in -traditional-cpp mode. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cb8a1aca35ecc33a5adcc0892aa31fb2a7750dc8;p=platform%2Fupstream%2Fllvm.git Preprocessor: preserve whitespace in -traditional-cpp mode. Note that unlike GNU cpp we currently do not preserve whitespace in macros (even in -traditional-cpp mode). llvm-svn: 175778 --- diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 535baf5..57e6c92 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -174,8 +174,8 @@ public: /// SetKeepWhitespaceMode - This method lets clients enable or disable /// whitespace retention mode. void SetKeepWhitespaceMode(bool Val) { - assert((!Val || LexingRawMode) && - "Can only enable whitespace retention in raw mode"); + assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) && + "Can only retain whitespace in raw mode or -traditional-cpp"); ExtendedTokenMode = Val ? 2 : 0; } @@ -194,6 +194,14 @@ public: ExtendedTokenMode = Mode ? 1 : 0; } + /// Sets the extended token mode back to its initial value, according to the + /// language options and preprocessor. This controls whether the lexer + /// produces comment and whitespace tokens. + /// + /// This requires the lexer to have an associated preprocessor. A standalone + /// lexer has nothing to reset to. + void resetExtendedTokenMode(); + const char *getBufferStart() const { return BufferStart; } /// ReadToEndOfLine - Read the rest of the current preprocessor line as an diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index c85945b..3d55adc 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -548,7 +548,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, // Tokens that can contain embedded newlines need to adjust our current // line number. - if (Tok.getKind() == tok::comment) + if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(TokPtr, Len); } else { std::string S = PP.getSpelling(Tok); @@ -556,7 +556,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, // Tokens that can contain embedded newlines need to adjust our current // line number. - if (Tok.getKind() == tok::comment) + if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(&S[0], S.size()); } Callbacks->setEmittedTokensOnThisLine(); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 0590d9e..65ea5e3 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), InputFile->getBufferEnd()); - // Default to keeping comments if the preprocessor wants them. - SetCommentRetentionState(PP.getCommentRetentionState()); + resetExtendedTokenMode(); +} + +void Lexer::resetExtendedTokenMode() { + assert(PP && "Cannot reset token mode without a preprocessor"); + if (LangOpts.TraditionalCPP) + SetKeepWhitespaceMode(true); + else + SetCommentRetentionState(PP->getCommentRetentionState()); } /// Lexer constructor - Create a new raw lexer object. This object is only @@ -1844,6 +1851,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, /// bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { // Whitespace - Skip it, then return the token after the whitespace. + bool SawNewline = isVerticalWhitespace(CurPtr[-1]); + unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently. while (1) { // Skip horizontal whitespace very aggressively. @@ -1851,7 +1860,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { Char = *++CurPtr; // Otherwise if we have something other than whitespace, we're done. - if (Char != '\n' && Char != '\r') + if (!isVerticalWhitespace(Char)) break; if (ParsingPreprocessorDirective) { @@ -1861,24 +1870,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { } // ok, but handle newline. - // The returned token is at the start of the line. - Result.setFlag(Token::StartOfLine); - // No leading whitespace seen so far. - Result.clearFlag(Token::LeadingSpace); + SawNewline = true; Char = *++CurPtr; } - // If this isn't immediately after a newline, there is leading space. - char PrevChar = CurPtr[-1]; - if (PrevChar != '\n' && PrevChar != '\r') - Result.setFlag(Token::LeadingSpace); - // If the client wants us to return whitespace, return it now. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); + if (SawNewline) + IsAtStartOfLine = true; + // FIXME: The next token will not have LeadingSpace set. return true; } + // If this isn't immediately after a newline, there is leading space. + char PrevChar = CurPtr[-1]; + bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); + + Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); + if (SawNewline) + Result.setFlag(Token::StartOfLine); + BufferPtr = CurPtr; return false; } @@ -2269,7 +2281,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(*CurPtr)) { - Result.setFlag(Token::LeadingSpace); SkipWhitespace(Result, CurPtr+1); return false; } @@ -2351,7 +2362,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { FormTokenWithChars(Result, CurPtr, tok::eod); // Restore comment saving mode, in case it was disabled for directive. - SetCommentRetentionState(PP->getCommentRetentionState()); + resetExtendedTokenMode(); return true; // Have a token. } @@ -2718,6 +2729,7 @@ LexNextToken: // whitespace. if (isKeepWhitespaceMode()) { FormTokenWithChars(Result, CurPtr, tok::unknown); + // FIXME: The next token will not have LeadingSpace set. return; } @@ -2785,7 +2797,7 @@ LexNextToken: // Restore comment saving mode, in case it was disabled for directive. if (PP) - SetCommentRetentionState(PP->getCommentRetentionState()); + resetExtendedTokenMode(); // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; @@ -2793,8 +2805,7 @@ LexNextToken: Kind = tok::eod; break; } - // The returned token is at the start of the line. - Result.setFlag(Token::StartOfLine); + // No leading whitespace seen so far. Result.clearFlag(Token::LeadingSpace); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 1825028..54457c3 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -269,7 +269,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (Tok.isNot(tok::raw_identifier)) { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } @@ -285,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, FirstChar != 'i' && FirstChar != 'e') { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } @@ -302,7 +302,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (IdLen >= 20) { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); continue; } memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); @@ -408,7 +408,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); + if (CurLexer) CurLexer->resetExtendedTokenMode(); } // Finally, if we are out of the conditional (saw an #endif or ran off the end @@ -594,6 +594,7 @@ void Preprocessor::HandleDirective(Token &Result) { // mode. Tell the lexer this so any newlines we see will be converted into an // EOD token (which terminates the directive). CurPPLexer->ParsingPreprocessorDirective = true; + if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); ++NumDirectives; @@ -638,14 +639,9 @@ void Preprocessor::HandleDirective(Token &Result) { // and reset to previous state when returning from this function. ResetMacroExpansionHelper helper(this); -TryAgain: switch (Result.getKind()) { case tok::eod: return; // null directive. - case tok::comment: - // Handle stuff like "# /*foo*/ define X" in -E -C mode. - LexUnexpandedToken(Result); - goto TryAgain; case tok::code_completion: if (CodeComplete) CodeComplete->CodeCompleteDirective( diff --git a/clang/test/Preprocessor/traditional-cpp.c b/clang/test/Preprocessor/traditional-cpp.c index 5fc9ee39..7202454 100644 --- a/clang/test/Preprocessor/traditional-cpp.c +++ b/clang/test/Preprocessor/traditional-cpp.c @@ -4,9 +4,61 @@ /* RUN: %clang_cc1 -traditional-cpp %s -E -o %t - RUN: FileCheck < %t %s + RUN: FileCheck -strict-whitespace < %t %s */ -/* CHECK: foo // bar +/* CHECK: {{^}}foo // bar{{$}} */ foo // bar + + +/* The lines in this file contain hard tab characters and trailing whitespace; + * do not change them! */ + +/* CHECK: {{^}} indented!{{$}} + * CHECK: {{^}}tab separated values{{$}} + */ + indented! +tab separated values + +#define bracket(x) >>>x<<< +bracket(| spaces |) +/* CHECK: {{^}}>>>| spaces |<<<{{$}} + */ + +/* This is still a preprocessing directive. */ +# define foo bar +foo! +- + foo! foo! +/* CHECK: {{^}}bar!{{$}} + * CHECK: {{^}} bar! bar! {{$}} + */ + +/* Deliberately check a leading newline with spaces on that line. */ + +# define foo bar +foo! +- + foo! foo! +/* CHECK: {{^}}bar!{{$}} + * CHECK: {{^}} bar! bar! {{$}} + */ + +/* FIXME: -traditional-cpp should not consider this a preprocessing directive + * because the # isn't in the first column. + */ + #define foo2 bar +foo2! +/* If this were working, both of these checks would be on. + * CHECK-NOT: {{^}} #define foo2 bar{{$}} + * CHECK-NOT: {{^}}foo2!{{$}} + */ + +/* FIXME: -traditional-cpp should not homogenize whitespace in macros. + */ +#define bracket2(x) >>> x <<< +bracket2(spaces) +/* If this were working, this check would be on. + * CHECK-NOT: {{^}}>>> spaces <<<{{$}} + */