From 9f2bd4e7eacccd5e29747f38ac033562fd85bd5e Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Sat, 1 Oct 2016 00:42:32 +0000 Subject: [PATCH] [MC] Prevent out of order HashDirective lexing in AsmLexer. To lex hash directives we peek ahead to find component tokens, create a unified token, and unlex the peeked tokens so the parser does not need to parse the tokens then. Make sure we do not to lex another hash directive during peek operation. This fixes PR28921. Reviewers: rnk, loladiro Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D24839 llvm-svn: 282992 --- llvm/include/llvm/MC/MCParser/AsmLexer.h | 2 +- llvm/lib/MC/MCParser/AsmLexer.cpp | 43 +++++++++++++------------------- llvm/test/MC/AsmParser/pr28921.s | 8 ++++++ 3 files changed, 26 insertions(+), 27 deletions(-) create mode 100644 llvm/test/MC/AsmParser/pr28921.s diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h index 06937e2..029598c 100644 --- a/llvm/include/llvm/MC/MCParser/AsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h @@ -32,7 +32,7 @@ class AsmLexer : public MCAsmLexer { bool IsAtStartOfLine; bool IsAtStartOfStatement; bool IsParsingMSInlineAsm; - + bool IsPeeking; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index c1f71bc..d0c8bce 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -11,29 +11,29 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCParser/AsmLexer.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SaveAndRestore.h" #include #include #include #include -#include #include +#include #include using namespace llvm; -AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { - CurPtr = nullptr; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; - IsParsingMSInlineAsm = false; +AsmLexer::AsmLexer(const MCAsmInfo &MAI) + : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), + IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), + IsPeeking(false) { AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -487,17 +487,15 @@ StringRef AsmLexer::LexUntilEndOfLine() { size_t AsmLexer::peekTokens(MutableArrayRef Buf, bool ShouldSkipSpace) { - const char *SavedTokStart = TokStart; - const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = IsAtStartOfLine; - bool SavedAtStartOfStatement = IsAtStartOfStatement; - bool SavedSkipSpace = SkipSpace; - + SaveAndRestore SavedTokenStart(TokStart); + SaveAndRestore SavedCurPtr(CurPtr); + SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); + SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); + SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); + SaveAndRestore SavedIsPeeking(IsPeeking, true); std::string SavedErr = getErr(); SMLoc SavedErrLoc = getErrLoc(); - SkipSpace = ShouldSkipSpace; - size_t ReadCount; for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { AsmToken Token = LexToken(); @@ -509,13 +507,6 @@ size_t AsmLexer::peekTokens(MutableArrayRef Buf, } SetError(SavedErrLoc, SavedErr); - - SkipSpace = SavedSkipSpace; - IsAtStartOfLine = SavedAtStartOfLine; - IsAtStartOfStatement = SavedAtStartOfStatement; - CurPtr = SavedCurPtr; - TokStart = SavedTokStart; - return ReadCount; } @@ -525,7 +516,7 @@ bool AsmLexer::isAtStartOfComment(const char *Ptr) { if (CommentString[1] == '\0') return CommentString[0] == Ptr[0]; - // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin + // Allow # preprocessor commments also be counted as comments for "##" cases if (CommentString[1] == '#') return CommentString[0] == Ptr[0]; @@ -542,7 +533,7 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (CurChar == '#' && IsAtStartOfStatement) { + if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { // If this starts with a '#', this may be a cpp // hash directive and otherwise a line comment. AsmToken TokenBuf[2]; diff --git a/llvm/test/MC/AsmParser/pr28921.s b/llvm/test/MC/AsmParser/pr28921.s new file mode 100644 index 0000000..2fbb555 --- /dev/null +++ b/llvm/test/MC/AsmParser/pr28921.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple i386-unknown-unknown %s + +# 1 "kernel.S" +# 1 "" 1 +# 1 "kernel.S" 2 +## +# 10 "kernel.S" +## -- 2.7.4