From 5a33047022ca4b7863be05b4be75678d5c0a44ee Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Mon, 29 Apr 2019 13:32:36 +0000 Subject: [PATCH] FileCheck [2/12]: Stricter parsing of -D option Summary: This patch is part of a patch series to add support for FileCheck numeric expressions. This specific patch gives earlier and better diagnostics for the -D option. Prior to this change, parsing of -D option was very loose: it assumed that there is an equal sign (which to be fair is now checked by the FileCheck executable) and that the part on the left of the equal sign was a valid variable name. This commit adds logic to ensure that this is the case and gives diagnostic when it is not, making it clear that the issue came from a command-line option error. This is achieved by sharing the variable parsing code into a new function ParseVariable. Copyright: - Linaro (changes up to diff 183612 of revision D55940) - GraphCore (changes in later versions of revision D55940 and in new revision created off D55940) Reviewers: jhenderson, chandlerc, jdenny, probinson, grimar, arichardson, rnk Subscribers: hiraditya, llvm-commits, probinson, dblaikie, grimar, arichardson, tra, rnk, kristina, hfinkel, rogfer01, JonChesterfield Tags: #llvm Differential Revision: https://reviews.llvm.org/D60382 llvm-svn: 359447 --- llvm/include/llvm/Support/FileCheck.h | 13 ++- llvm/lib/Support/FileCheck.cpp | 155 ++++++++++++++++++++++--------- llvm/test/FileCheck/defines.txt | 16 +++- llvm/unittests/Support/FileCheckTest.cpp | 109 +++++++++++++++++++++- 4 files changed, 243 insertions(+), 50 deletions(-) diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h index 0b68610..4a63404 100644 --- a/llvm/include/llvm/Support/FileCheck.h +++ b/llvm/include/llvm/Support/FileCheck.h @@ -102,8 +102,10 @@ public: llvm::Optional getVarValue(StringRef VarName); /// Define variables from definitions given on the command line passed as a - /// vector of VAR=VAL strings in \p CmdlineDefines. - void defineCmdlineVariables(std::vector &CmdlineDefines); + /// vector of VAR=VAL strings in \p CmdlineDefines. Report any error to \p SM + /// and return whether an error occured. + bool defineCmdlineVariables(std::vector &CmdlineDefines, + SourceMgr &SM); /// Undefine local variables (variables whose name does not start with a '$' /// sign), i.e. remove them from GlobalVariableTable. @@ -153,6 +155,13 @@ public: /// Returns the pointer to the global state for all patterns in this /// FileCheck instance. FileCheckPatternContext *getContext() const { return Context; } + /// Return whether \p is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + /// Verify that the string at the start of \p Str is a well formed variable. + /// Return false if it is and set \p IsPseudo to indicate if it is a pseudo + /// variable and \p TrailIdx to the position of the last character that is + /// part of the variable name. Otherwise, only return true. + static bool parseVariable(StringRef Str, bool &IsPseudo, unsigned &TrailIdx); bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, unsigned LineNumber, const FileCheckRequest &Req); size_t match(StringRef Buffer, size_t &MatchLen) const; diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp index e1564a4..a80750d 100644 --- a/llvm/lib/Support/FileCheck.cpp +++ b/llvm/lib/Support/FileCheck.cpp @@ -24,6 +24,37 @@ using namespace llvm; +bool FileCheckPattern::isValidVarNameStart(char C) { + return C == '_' || isalpha(C); +} + +bool FileCheckPattern::parseVariable(StringRef Str, bool &IsPseudo, + unsigned &TrailIdx) { + if (Str.empty()) + return true; + + bool ParsedOneChar = false; + unsigned I = 0; + IsPseudo = Str[0] == '@'; + + // Global vars start with '$'. + if (Str[0] == '$' || IsPseudo) + ++I; + + for (unsigned E = Str.size(); I != E; ++I) { + if (!ParsedOneChar && !isValidVarNameStart(Str[I])) + return true; + + // Variable names are composed of alphanumeric characters and underscores. + if (Str[I] != '_' && !isalnum(Str[I])) + break; + ParsedOneChar = true; + } + + TrailIdx = I; + return false; +} + /// Parses the given string into the Pattern. /// /// \p Prefix provides which prefix is being matched, \p SM provides the @@ -117,9 +148,10 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix, // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject // it. This is to catch some common errors. if (PatternStr.startswith("[[")) { + StringRef MatchStr = PatternStr.substr(2); // Find the closing bracket pair ending the match. End is going to be an // offset relative to the beginning of the match string. - size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); + size_t End = FindRegexVarEnd(MatchStr, SM); if (End == StringRef::npos) { SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), @@ -128,55 +160,44 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix, return true; } - StringRef MatchStr = PatternStr.substr(2, End); + MatchStr = MatchStr.substr(0, End); PatternStr = PatternStr.substr(End + 4); - // Get the regex name (e.g. "foo"). - size_t NameEnd = MatchStr.find(':'); - StringRef Name = MatchStr.substr(0, NameEnd); + // Get the regex name (e.g. "foo") and verify it is well formed. + bool IsPseudo; + unsigned TrailIdx; + if (parseVariable(MatchStr, IsPseudo, TrailIdx)) { + SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()), + SourceMgr::DK_Error, "invalid name in named regex"); + return true; + } + + StringRef Name = MatchStr.substr(0, TrailIdx); + StringRef Trailer = MatchStr.substr(TrailIdx); + bool IsVarDef = (Trailer.find(":") != StringRef::npos); - if (Name.empty()) { - SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, - "invalid name in named regex: empty name"); + if (IsVarDef && (IsPseudo || !Trailer.consume_front(":"))) { + SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()), + SourceMgr::DK_Error, + "invalid name in named regex definition"); return true; } // Verify that the name/expression is well formed. FileCheck currently // supports @LINE, @LINE+number, @LINE-number expressions. The check here - // is relaxed, more strict check is performed in \c EvaluateExpression. - bool IsExpression = false; - for (unsigned i = 0, e = Name.size(); i != e; ++i) { - if (i == 0) { - if (Name[i] == '$') // Global vars start with '$' - continue; - if (Name[i] == '@') { - if (NameEnd != StringRef::npos) { - SM.PrintMessage(SMLoc::getFromPointer(Name.data()), - SourceMgr::DK_Error, - "invalid name in named regex definition"); - return true; - } - IsExpression = true; - continue; + // is relaxed. A stricter check is performed in \c EvaluateExpression. + if (IsPseudo) { + for (unsigned I = 0, E = Trailer.size(); I != E; ++I) { + if (!isalnum(Trailer[I]) && Trailer[I] != '+' && Trailer[I] != '-') { + SM.PrintMessage(SMLoc::getFromPointer(Name.data() + I), + SourceMgr::DK_Error, "invalid name in named regex"); + return true; } } - if (Name[i] != '_' && !isalnum(Name[i]) && - (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { - SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), - SourceMgr::DK_Error, "invalid name in named regex"); - return true; - } - } - - // Name can't start with a digit. - if (isdigit(static_cast(Name[0]))) { - SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, - "invalid name in named regex"); - return true; } // Handle [[foo]]. - if (NameEnd == StringRef::npos) { + if (!IsVarDef) { // Handle variables that were defined earlier on the same line by // emitting a backreference. if (VariableDefs.find(Name) != VariableDefs.end()) { @@ -189,7 +210,7 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix, } AddBackrefToRegEx(VarParenNum); } else { - VariableUses.push_back(std::make_pair(Name, RegExStr.size())); + VariableUses.push_back(std::make_pair(MatchStr, RegExStr.size())); } continue; } @@ -199,7 +220,7 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix, RegExStr += '('; ++CurParen; - if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) + if (AddRegExToRegEx(Trailer, CurParen, SM)) return true; RegExStr += ')'; @@ -755,7 +776,8 @@ FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, bool llvm::FileCheck::ReadCheckFile( SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, std::vector &CheckStrings) { - PatternContext.defineCmdlineVariables(Req.GlobalDefines); + if (PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM)) + return true; std::vector ImplicitNegativeChecks; for (const auto &PatternString : Req.ImplicitCheckNot) { @@ -1374,12 +1396,59 @@ Regex llvm::FileCheck::buildCheckPrefixRegex() { return Regex(PrefixRegexStr); } -void FileCheckPatternContext::defineCmdlineVariables( - std::vector &CmdlineDefines) { +bool FileCheckPatternContext::defineCmdlineVariables( + std::vector &CmdlineDefines, SourceMgr &SM) { assert(GlobalVariableTable.empty() && "Overriding defined variable with command-line variable definitions"); + + if (CmdlineDefines.empty()) + return false; + + // Create a string representing the vector of command-line definitions. Each + // definition is on its own line and prefixed with a definition number to + // clarify which definition a given diagnostic corresponds to. + unsigned I = 0; + bool ErrorFound = false; + std::string CmdlineDefsDiag; + StringRef Prefix1 = "Global define #"; + StringRef Prefix2 = ": "; for (StringRef CmdlineDef : CmdlineDefines) - GlobalVariableTable.insert(CmdlineDef.split('=')); + CmdlineDefsDiag += + (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str(); + + std::unique_ptr CmdLineDefsDiagBuffer = + MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); + StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); + SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); + + SmallVector CmdlineDefsDiagVec; + CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/, + false /*KeepEmpty*/); + for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) { + unsigned NameStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size(); + if (CmdlineDefDiag.substr(NameStart).find('=') == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(CmdlineDefDiag.data()), + SourceMgr::DK_Error, + "Missing equal sign in global definition"); + ErrorFound = true; + continue; + } + std::pair CmdlineNameVal = + CmdlineDefDiag.substr(NameStart).split('='); + StringRef Name = CmdlineNameVal.first; + bool IsPseudo; + unsigned TrailIdx; + if (FileCheckPattern::parseVariable(Name, IsPseudo, TrailIdx) || IsPseudo || + TrailIdx != Name.size() || Name.empty()) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, + "invalid name for variable definition '" + Name + "'"); + ErrorFound = true; + continue; + } + GlobalVariableTable.insert(CmdlineNameVal); + } + + return ErrorFound; } void FileCheckPatternContext::clearLocalVars() { diff --git a/llvm/test/FileCheck/defines.txt b/llvm/test/FileCheck/defines.txt index f262880..86113b8 100644 --- a/llvm/test/FileCheck/defines.txt +++ b/llvm/test/FileCheck/defines.txt @@ -1,6 +1,5 @@ ; RUN: FileCheck -DVALUE=10 -input-file %s %s ; RUN: not FileCheck -DVALUE=20 -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRMSG -; ; RUN: not FileCheck -DVALUE=10 -check-prefix NOT -input-file %s %s 2>&1 | FileCheck %s -check-prefix NOT-ERRMSG ; RUN: FileCheck -DVALUE=20 -check-prefix NOT -input-file %s %s ; RUN: not FileCheck -DVALUE10 -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRCLIEQ1 @@ -9,6 +8,9 @@ ; RUN: not FileCheck -D= -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRCLIVAR2 ; RUN: FileCheck -DVALUE= -check-prefix EMPTY -input-file %s %s 2>&1 +; RUN: not FileCheck -D10VALUE=10 -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLIFMT +; RUN: not FileCheck -D@VALUE=10 -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLIPSEUDO +; RUN: not FileCheck -D'VALUE + 2=10' -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLITRAIL Value = 10 ; CHECK: Value = [[VALUE]] ; NOT-NOT: Value = [[VALUE]] @@ -32,3 +34,15 @@ Value = 10 Empty value = @@ ; EMPTY: Empty value = @[[VALUE]]@ + +; ERRCLIFMT: Global defines:1:19: error: invalid name for variable definition '10VALUE' +; ERRCLIFMT-NEXT: Global define #1: 10VALUE=10 +; ERRCLIFMT-NEXT: {{^ \^$}} + +; ERRCLIPSEUDO: Global defines:1:19: error: invalid name for variable definition '@VALUE' +; ERRCLIPSEUDO-NEXT: Global define #1: @VALUE=10 +; ERRCLIPSEUDO-NEXT: {{^ \^$}} + +; ERRCLITRAIL: Global defines:1:19: error: invalid name for variable definition 'VALUE + 2' +; ERRCLITRAIL-NEXT: Global define #1: VALUE + 2=10 +; ERRCLITRAIL-NEXT: {{^ \^$}} diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp index bf2339c..497c808 100644 --- a/llvm/unittests/Support/FileCheckTest.cpp +++ b/llvm/unittests/Support/FileCheckTest.cpp @@ -14,31 +14,132 @@ namespace { class FileCheckTest : public ::testing::Test {}; +TEST_F(FileCheckTest, ValidVarNameStart) { + EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('a')); + EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('G')); + EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('_')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('2')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('$')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('@')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('+')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('-')); + EXPECT_FALSE(FileCheckPattern::isValidVarNameStart(':')); +} + +TEST_F(FileCheckTest, ParseVar) { + StringRef VarName = "GoodVar42"; + bool IsPseudo = true; + unsigned TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size()); + + VarName = "$GoodGlobalVar"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size()); + + VarName = "@GoodPseudoVar"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_TRUE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size()); + + VarName = "42BadVar"; + EXPECT_TRUE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + + VarName = "$@"; + EXPECT_TRUE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + + VarName = "B@dVar"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, 1U); + + VarName = "B$dVar"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, 1U); + + VarName = "BadVar+"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size() - 1); + + VarName = "BadVar-"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size() - 1); + + VarName = "BadVar:"; + IsPseudo = true; + TrailIdx = 0; + EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx)); + EXPECT_FALSE(IsPseudo); + EXPECT_EQ(TrailIdx, VarName.size() - 1); +} + TEST_F(FileCheckTest, FileCheckContext) { - FileCheckPatternContext Cxt; + FileCheckPatternContext Cxt = FileCheckPatternContext(); std::vector GlobalDefines; + SourceMgr SM; + + // Missing equal sign + GlobalDefines.emplace_back(std::string("LocalVar")); + EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM)); + + // Empty variable + GlobalDefines.clear(); + GlobalDefines.emplace_back(std::string("=18")); + EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM)); + + // Invalid variable + GlobalDefines.clear(); + GlobalDefines.emplace_back(std::string("18LocalVar=18")); + EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM)); - // Define local and global variables from command-line. + // Define local variables from command-line. + GlobalDefines.clear(); GlobalDefines.emplace_back(std::string("LocalVar=FOO")); - Cxt.defineCmdlineVariables(GlobalDefines); + GlobalDefines.emplace_back(std::string("EmptyVar=")); + bool GotError = Cxt.defineCmdlineVariables(GlobalDefines, SM); + EXPECT_FALSE(GotError); // Check defined variables are present and undefined is absent. StringRef LocalVarStr = "LocalVar"; + StringRef EmptyVarStr = "EmptyVar"; StringRef UnknownVarStr = "UnknownVar"; llvm::Optional LocalVar = Cxt.getVarValue(LocalVarStr); + llvm::Optional EmptyVar = Cxt.getVarValue(EmptyVarStr); llvm::Optional UnknownVar = Cxt.getVarValue(UnknownVarStr); EXPECT_TRUE(LocalVar); EXPECT_EQ(*LocalVar, "FOO"); + EXPECT_TRUE(EmptyVar); + EXPECT_EQ(*EmptyVar, ""); EXPECT_FALSE(UnknownVar); // Clear local variables and check they become absent. Cxt.clearLocalVars(); LocalVar = Cxt.getVarValue(LocalVarStr); EXPECT_FALSE(LocalVar); + EmptyVar = Cxt.getVarValue(EmptyVarStr); + EXPECT_FALSE(EmptyVar); // Redefine global variables and check variables are defined again. GlobalDefines.emplace_back(std::string("$GlobalVar=BAR")); - Cxt.defineCmdlineVariables(GlobalDefines); + GotError = Cxt.defineCmdlineVariables(GlobalDefines, SM); + EXPECT_FALSE(GotError); StringRef GlobalVarStr = "$GlobalVar"; llvm::Optional GlobalVar = Cxt.getVarValue(GlobalVarStr); EXPECT_TRUE(GlobalVar); -- 2.7.4