From 40d6663367b7e7c0cc5c3686e18fe75facd30403 Mon Sep 17 00:00:00 2001 From: Mitch Phillips Date: Tue, 7 Nov 2017 21:16:46 +0000 Subject: [PATCH] Extend SpecialCaseList to allow users to blame matches on entries in the file. Summary: Extends SCL functionality to allow users to find the line number in the file the SCL is built from through SpecialCaseList::inSectionBlame(...). Also removes the need to compile the SCL before use. As the matcher now contains a list of regexes to test against instead of a single regex, the regexes can be individually built on each insertion rather than one large compilation at the end of construction. This change also fixes a bug where blank lines would cause the parser to become out-of-sync with the line number. An error on line `k` was being reported as being on line `k - num_blank_lines_before_k`. Note: This change has a cyclical dependency on D39486. Both these changes must be submitted at the same time to avoid a build breakage. Reviewers: vlad.tsyrklevich Reviewed By: vlad.tsyrklevich Subscribers: kcc, pcc, llvm-commits Differential Revision: https://reviews.llvm.org/D39485 llvm-svn: 317617 --- llvm/include/llvm/Support/SpecialCaseList.h | 37 +++++++----- llvm/lib/Support/SpecialCaseList.cpp | 83 ++++++++++++-------------- llvm/unittests/Support/SpecialCaseListTest.cpp | 24 ++++++++ 3 files changed, 83 insertions(+), 61 deletions(-) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index f76ca30..fd62fc4 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -89,6 +89,17 @@ public: bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category = StringRef()) const; + /// Returns the line number corresponding to the special case list entry if + /// the special case list contains a line + /// \code + /// @Prefix:=@Category + /// \endcode + /// where @Query satisfies wildcard expression in a given @Section. + /// Returns zero if there is no blacklist entry corresponding to this + /// expression. + unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, + StringRef Category = StringRef()) const; + protected: // Implementations of the create*() functions that can also be used by derived // classes. @@ -96,25 +107,25 @@ protected: std::string &Error); bool createInternal(const MemoryBuffer *MB, std::string &Error); + SpecialCaseList() = default; SpecialCaseList(SpecialCaseList const &) = delete; SpecialCaseList &operator=(SpecialCaseList const &) = delete; /// Represents a set of regular expressions. Regular expressions which are - /// "literal" (i.e. no regex metacharacters) are stored in Strings, while all - /// others are represented as a single pipe-separated regex in RegEx. The - /// reason for doing so is efficiency; StringSet is much faster at matching + /// "literal" (i.e. no regex metacharacters) are stored in Strings. The + /// reason for doing so is efficiency; StringMap is much faster at matching /// literal strings than Regex. class Matcher { public: - bool insert(std::string Regexp, std::string &REError); - void compile(); - bool match(StringRef Query) const; + bool insert(std::string Regexp, unsigned LineNumber, std::string &REError); + // Returns the line number in the source file that this query matches to. + // Returns zero if no match is found. + unsigned match(StringRef Query) const; private: - StringSet<> Strings; + StringMap Strings; TrigramIndex Trigrams; - std::unique_ptr RegEx; - std::string UncompiledRegEx; + std::vector, unsigned>> RegExes; }; using SectionEntries = StringMap>; @@ -127,19 +138,15 @@ protected: }; std::vector
Sections; - bool IsCompiled; - SpecialCaseList(); /// Parses just-constructed SpecialCaseList entries from a memory buffer. bool parse(const MemoryBuffer *MB, StringMap &SectionsMap, std::string &Error); - /// compile() should be called once, after parsing all the memory buffers. - void compile(); // Helper method for derived classes to search by Prefix, Query, and Category // once they have already resolved a section entry. - bool inSection(const SectionEntries &Entries, StringRef Prefix, - StringRef Query, StringRef Category) const; + unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix, + StringRef Query, StringRef Category) const; }; } // namespace llvm diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index a659a2a..bf807e6 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -27,6 +27,7 @@ namespace llvm { bool SpecialCaseList::Matcher::insert(std::string Regexp, + unsigned LineNumber, std::string &REError) { if (Regexp.empty()) { REError = "Supplied regexp was blank"; @@ -34,7 +35,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, } if (Regex::isLiteralERE(Regexp)) { - Strings.insert(Regexp); + Strings[Regexp] = LineNumber; return true; } Trigrams.insert(Regexp); @@ -45,34 +46,30 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, Regexp.replace(pos, strlen("*"), ".*"); } + Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str(); + // Check that the regexp is valid. Regex CheckRE(Regexp); if (!CheckRE.isValid(REError)) return false; - if (!UncompiledRegEx.empty()) - UncompiledRegEx += "|"; - UncompiledRegEx += "^(" + Regexp + ")$"; + RegExes.emplace_back( + std::make_pair(make_unique(std::move(CheckRE)), LineNumber)); return true; } -void SpecialCaseList::Matcher::compile() { - if (!UncompiledRegEx.empty()) { - RegEx.reset(new Regex(UncompiledRegEx)); - UncompiledRegEx.clear(); - } -} - -bool SpecialCaseList::Matcher::match(StringRef Query) const { - if (Strings.count(Query)) - return true; +unsigned SpecialCaseList::Matcher::match(StringRef Query) const { + auto It = Strings.find(Query); + if (It != Strings.end()) + return It->second; if (Trigrams.isDefinitelyOut(Query)) return false; - return RegEx && RegEx->match(Query); + for (auto& RegExKV : RegExes) + if (RegExKV.first->match(Query)) + return RegExKV.second; + return 0; } -SpecialCaseList::SpecialCaseList() : Sections(), IsCompiled(false) {} - std::unique_ptr SpecialCaseList::create(const std::vector &Paths, std::string &Error) { @@ -114,7 +111,6 @@ bool SpecialCaseList::createInternal(const std::vector &Paths, return false; } } - compile(); return true; } @@ -123,7 +119,6 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, StringMap Sections; if (!parse(MB, Sections, Error)) return false; - compile(); return true; } @@ -132,11 +127,13 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { // Iterate through each line in the blacklist file. SmallVector Lines; - SplitString(MB->getBuffer(), Lines, "\n\r"); + MB->getBuffer().split(Lines, '\n'); - int LineNo = 1; + unsigned LineNo = 1; StringRef Section = "*"; + for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) { + *I = I->trim(); // Ignore empty lines and lines starting with "#" if (I->empty() || I->startswith("#")) continue; @@ -181,11 +178,10 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, if (SectionsMap.find(Section) == SectionsMap.end()) { std::unique_ptr M = make_unique(); std::string REError; - if (!M->insert(Section, REError)) { + if (!M->insert(Section, LineNo, REError)) { Error = (Twine("malformed section ") + Section + ": '" + REError).str(); return false; } - M->compile(); SectionsMap[Section] = Sections.size(); Sections.emplace_back(std::move(M)); @@ -193,7 +189,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category]; std::string REError; - if (!Entry.insert(std::move(Regexp), REError)) { + if (!Entry.insert(std::move(Regexp), LineNo, REError)) { Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" + SplitLine.second + "': " + REError).str(); return false; @@ -202,38 +198,33 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, return true; } -void SpecialCaseList::compile() { - assert(!IsCompiled && "compile() should only be called once"); - // Iterate through every section compiling regular expressions for every query - // and creating Section entries. - for (auto &Section : Sections) - for (auto &Prefix : Section.Entries) - for (auto &Category : Prefix.getValue()) - Category.getValue().compile(); - - IsCompiled = true; -} - SpecialCaseList::~SpecialCaseList() {} bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { - assert(IsCompiled && "SpecialCaseList::compile() was not called!"); + return inSectionBlame(Section, Prefix, Query, Category); +} +unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, + StringRef Query, + StringRef Category) const { for (auto &SectionIter : Sections) - if (SectionIter.SectionMatcher->match(Section) && - inSection(SectionIter.Entries, Prefix, Query, Category)) - return true; - - return false; + if (SectionIter.SectionMatcher->match(Section)) { + unsigned Blame = + inSectionBlame(SectionIter.Entries, Prefix, Query, Category); + if (Blame) + return Blame; + } + return 0; } -bool SpecialCaseList::inSection(const SectionEntries &Entries, StringRef Prefix, - StringRef Query, StringRef Category) const { +unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, + StringRef Prefix, StringRef Query, + StringRef Category) const { SectionEntries::const_iterator I = Entries.find(Prefix); - if (I == Entries.end()) return false; + if (I == Entries.end()) return 0; StringMap::const_iterator II = I->second.find(Category); - if (II == I->second.end()) return false; + if (II == I->second.end()) return 0; return II->getValue().match(Query); } diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp index 9e1223b..060703e 100644 --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -58,6 +58,30 @@ TEST_F(SpecialCaseListTest, Basic) { EXPECT_FALSE(SCL->inSection("", "src", "hi")); EXPECT_FALSE(SCL->inSection("", "fun", "hello")); EXPECT_FALSE(SCL->inSection("", "src", "hello", "category")); + + EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello")); + EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye")); + EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category")); + EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category")); + EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi")); + EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello")); + EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category")); +} + +TEST_F(SpecialCaseListTest, CorrectErrorLineNumberWithBlankLine) { + std::string Error; + EXPECT_EQ(nullptr, makeSpecialCaseList("# This is a comment.\n" + "\n" + "[not valid\n", + Error)); + EXPECT_TRUE( + ((StringRef)Error).startswith("malformed section header on line 3:")); + + EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n" + "[not valid\n", + Error)); + EXPECT_TRUE( + ((StringRef)Error).startswith("malformed section header on line 4:")); } TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) { -- 2.7.4