From 9263ae3b5ac8be8fc6e6011b4e36b2faf372520f Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Tue, 2 Aug 2016 19:17:54 +0000 Subject: [PATCH] Fix handling of end-of-line preprocessor comments Attempt 2 Attempt 2: Retryign after Tsan.mman test fix. Attempt 1: Recommitting after fixing test. When parsing assembly where the line comment syntax is not hash, the lexer cannot distinguish between hash's that start a hash line comment and one that is part of an assembly statement and must be distinguished during parsing. Previously, this was incompletely handled by not checking for EndOfStatement at the end of statements and interpreting hash prefixed statements as comments. Change EndOfStatement Parsing to check for Hash comments and reintroduce Hash statement parsing to catch previously handled cases. Reviewers: rnk, majnemer Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D23017 llvm-svn: 277501 --- llvm/lib/MC/MCParser/AsmParser.cpp | 34 ++++++++++++++++++++++++++++++++ llvm/test/MC/ARM/preserve-comments-arm.s | 10 ++++++++++ 2 files changed, 44 insertions(+) create mode 100644 llvm/test/MC/ARM/preserve-comments-arm.s diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index d85f1c6..6d3240a 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -262,9 +262,23 @@ public: return false; } + bool parseEOL(const Twine &ErrMsg) { + if (getTok().getKind() == AsmToken::Hash) { + StringRef CommentStr = parseStringToEndOfStatement(); + Lexer.Lex(); + Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); + } + if (getTok().getKind() != AsmToken::EndOfStatement) + return TokError(ErrMsg); + Lex(); + return false; + } + /// parseToken - If current token has the specified kind, eat it and /// return success. Otherwise, emit the specified error and return failure. bool parseToken(AsmToken::TokenKind T, const Twine &ErrMsg) { + if (T == AsmToken::EndOfStatement) + return parseEOL(ErrMsg); if (getTok().getKind() != T) return TokError(ErrMsg); Lex(); @@ -1409,6 +1423,16 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); return false; } + if (Lexer.is(AsmToken::Hash)) { + // Seeing a hash here means that it was an end-of-line comment in + // an asm syntax where hash's are not comment and the previous + // statement parser did not check the end of statement. Relex as + // EndOfStatement. + StringRef CommentStr = parseStringToEndOfStatement(); + Lexer.Lex(); + Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); + return false; + } // Statements always start with an identifier. AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); @@ -1542,6 +1566,16 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, if (!Sym->isUndefined() || Sym->isVariable()) return Error(IDLoc, "invalid symbol redefinition"); + // End of Labels should be treated as end of line for lexing + // purposes but that information is not available to the Lexer who + // does not understand Labels. This may cause us to see a Hash + // here instead of a preprocessor line comment. + if (getTok().is(AsmToken::Hash)) { + StringRef CommentStr = parseStringToEndOfStatement(); + Lexer.Lex(); + Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); + } + // Consume any end of statement token, if present, to avoid spurious // AddBlankLine calls(). if (getTok().is(AsmToken::EndOfStatement)) { diff --git a/llvm/test/MC/ARM/preserve-comments-arm.s b/llvm/test/MC/ARM/preserve-comments-arm.s new file mode 100644 index 0000000..4c48009 --- /dev/null +++ b/llvm/test/MC/ARM/preserve-comments-arm.s @@ -0,0 +1,10 @@ + @RUN: llvm-mc -preserve-comments -n -triple arm-eabi < %s > %t + @RUN: sed 's/#[C]omment/@Comment/g' %s > %t2 + @RUN: diff %t %t2 + .text + + mov r0, r0 +foo: #Comment here + mov r0, r0 @ EOL comment + .ident "" + -- 2.7.4