From f7a7ab59af582e121f7cfeaa84c75c344e437303 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 20 Dec 2016 23:09:09 +0000 Subject: [PATCH] Move GlobPattern class from LLD to llvm/Support. GlobPattern is a class to handle glob pattern matching. Currently only LLD is using that, but technically that feature is not specific to linkers, so in this patch I move that file to LLVM. Differential Revision: https://reviews.llvm.org/D27969 llvm-svn: 290212 --- lld/ELF/Strings.cpp | 134 ++--------------------- lld/ELF/Strings.h | 27 +---- llvm/include/llvm/Support/GlobPattern.h | 48 +++++++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/GlobPattern.cpp | 167 +++++++++++++++++++++++++++++ llvm/unittests/Support/CMakeLists.txt | 3 +- llvm/unittests/Support/GlobPatternTest.cpp | 70 ++++++++++++ 7 files changed, 297 insertions(+), 153 deletions(-) create mode 100644 llvm/include/llvm/Support/GlobPattern.h create mode 100644 llvm/lib/Support/GlobPattern.cpp create mode 100644 llvm/unittests/Support/GlobPatternTest.cpp diff --git a/lld/ELF/Strings.cpp b/lld/ELF/Strings.cpp index 47752a6..dcfb3fd 100644 --- a/lld/ELF/Strings.cpp +++ b/lld/ELF/Strings.cpp @@ -21,134 +21,14 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -// This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. -// This function returns the first token in S. -BitVector GlobPattern::scan(StringRef &S) { - switch (S[0]) { - case '*': - S = S.substr(1); - // '*' is represented by an empty bitvector. - // All other bitvectors are 256-bit long. - return BitVector(); - case '?': - S = S.substr(1); - return BitVector(256, true); - case '[': { - size_t End = S.find(']', 1); - if (End == StringRef::npos) { - error("invalid glob pattern: " + Original); - S = ""; - return BitVector(256, false); - } - StringRef Chars = S.substr(1, End - 1); - S = S.substr(End + 1); - if (Chars.startswith("^")) - return expand(Chars.substr(1)).flip(); - return expand(Chars); - } - default: - BitVector BV(256, false); - BV[S[0]] = true; - S = S.substr(1); - return BV; - } -} - -// Expands character ranges and returns a bitmap. -// For example, "a-cf-hz" is expanded to "abcfghz". -BitVector GlobPattern::expand(StringRef S) { - BitVector BV(256, false); - - // Expand "x-y". - for (;;) { - if (S.size() < 3) - break; - - // If it doesn't start with something like "x-y", - // consume the first character and proceed. - if (S[1] != '-') { - BV[S[0]] = true; - S = S.substr(1); - continue; - } - - // It must be in the form of "x-y". - // Validate it and then interpret the range. - if (S[0] > S[2]) { - error("invalid glob pattern: " + Original); - return BV; - } - for (int C = S[0]; C <= S[2]; ++C) - BV[C] = true; - S = S.substr(3); - } - - for (char C : S) - BV[C] = true; - return BV; -} - -GlobPattern::GlobPattern(StringRef S) : Original(S) { - if (!hasWildcard(S)) { - // S doesn't contain any metacharacter, - // so the regular string comparison should work. - Exact = S; - } else if (S.endswith("*") && !hasWildcard(S.drop_back())) { - // S is something like "foo*". We can use startswith(). - Prefix = S.drop_back(); - } else if (S.startswith("*") && !hasWildcard(S.drop_front())) { - // S is something like "*foo". We can use endswith(). - Suffix = S.drop_front(); - } else { - // Otherwise, we need to do real glob pattern matching. - // Parse the pattern now. - while (!S.empty()) - Tokens.push_back(scan(S)); - } -} - -bool GlobPattern::match(StringRef S) const { - if (Exact) - return S == *Exact; - if (Prefix) - return S.startswith(*Prefix); - if (Suffix) - return S.endswith(*Suffix); - return matchOne(Tokens, S); -} - -// Runs glob pattern Pats against string S. -bool GlobPattern::matchOne(ArrayRef Pats, StringRef S) const { - for (;;) { - if (Pats.empty()) - return S.empty(); - - // If Pats[0] is '*', try to match Pats[1..] against all possible - // substrings of S to see at least one pattern succeeds. - if (Pats[0].size() == 0) { - Pats = Pats.slice(1); - if (Pats.empty()) - // Fast path. If a pattern is '*', it matches anything. - return true; - for (size_t I = 0, E = S.size(); I < E; ++I) - if (matchOne(Pats, S.substr(I))) - return true; - return false; - } - - // If Pats[0] is not '*', it must consume one character. - if (S.empty() || !Pats[0][S[0]]) - return false; - Pats = Pats.slice(1); - S = S.substr(1); - } -} - StringMatcher::StringMatcher(const std::vector &Pat) { - for (StringRef S : Pat) - Patterns.push_back(GlobPattern(S)); + for (StringRef S : Pat) { + Expected Pat = GlobPattern::create(S); + if (!Pat) + error(toString(Pat.takeError())); + else + Patterns.push_back(*Pat); + } } bool StringMatcher::match(StringRef S) const { diff --git a/lld/ELF/Strings.h b/lld/ELF/Strings.h index 1387c27..6036cf5 100644 --- a/lld/ELF/Strings.h +++ b/lld/ELF/Strings.h @@ -15,6 +15,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/GlobPattern.h" #include namespace lld { @@ -56,30 +57,6 @@ private: mutable size_t Size; }; -// This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". -class GlobPattern { -public: - explicit GlobPattern(StringRef Pat); - bool match(StringRef S) const; - -private: - bool matchOne(ArrayRef Pat, StringRef S) const; - llvm::BitVector scan(StringRef &S); - llvm::BitVector expand(StringRef S); - - // Parsed glob pattern. - std::vector Tokens; - - // A glob pattern given to this class. This is for error reporting. - StringRef Original; - - // The following members are for optimization. - llvm::Optional Exact; - llvm::Optional Prefix; - llvm::Optional Suffix; -}; - // This class represents multiple glob patterns. class StringMatcher { public: @@ -89,7 +66,7 @@ public: bool match(StringRef S) const; private: - std::vector Patterns; + std::vector Patterns; }; // Returns a demangled C++ symbol name. If Name is not a mangled diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h new file mode 100644 index 0000000..c9436a1 --- /dev/null +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -0,0 +1,48 @@ +//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a glob pattern matcher. The glob pattern is the +// rule used by the shell. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_GLOB_PATTERN_H +#define LLVM_SUPPORT_GLOB_PATTERN_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +// This class represents a glob pattern. Supported metacharacters +// are "*", "?", "[]" and "[^]". +namespace llvm { +class BitVector; +template class ArrayRef; + +class GlobPattern { +public: + static Expected create(StringRef Pat); + bool match(StringRef S) const; + +private: + bool matchOne(ArrayRef Pat, StringRef S) const; + + // Parsed glob pattern. + std::vector Tokens; + + // The following members are for optimization. + Optional Exact; + Optional Prefix; + Optional Suffix; +}; +} + +#endif // LLVM_SUPPORT_GLOB_PATTERN_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index d6cbfbd..03addcb 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -56,6 +56,7 @@ add_llvm_library(LLVMSupport FoldingSet.cpp FormattedStream.cpp FormatVariadic.cpp + GlobPattern.cpp GraphWriter.cpp Hashing.cpp IntEqClasses.cpp diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp new file mode 100644 index 0000000..8ee2feb --- /dev/null +++ b/llvm/lib/Support/GlobPattern.cpp @@ -0,0 +1,167 @@ +//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a glob pattern matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GlobPattern.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" + +using namespace llvm; + +static bool hasWildcard(StringRef S) { + return S.find_first_of("?*[") != StringRef::npos; +} + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +static Expected expand(StringRef S, StringRef Original) { + BitVector BV(256, false); + + // Expand X-Y. + for (;;) { + if (S.size() < 3) + break; + + // If it doesn't start with something like X-Y, + // consume the first character and proceed. + if (S[1] != '-') { + BV[S[0]] = true; + S = S.substr(1); + continue; + } + + // It must be in the form of X-Y. + // Validate it and then interpret the range. + if (S[0] > S[2]) + return make_error("invalid glob pattern: " + Original, + errc::invalid_argument); + + for (int C = S[0]; C <= S[2]; ++C) + BV[C] = true; + S = S.substr(3); + } + + for (char C : S) + BV[C] = true; + return BV; +} + +// This is a scanner for the glob pattern. +// A glob pattern token is one of "*", "?", "[]", "[^]" +// (which is a negative form of "[]"), or a non-meta character. +// This function returns the first token in S. +static Expected scan(StringRef &S, StringRef Original) { + switch (S[0]) { + case '*': + S = S.substr(1); + // '*' is represented by an empty bitvector. + // All other bitvectors are 256-bit long. + return BitVector(); + case '?': + S = S.substr(1); + return BitVector(256, true); + case '[': { + size_t End = S.find(']', 1); + if (End == StringRef::npos) + return make_error("invalid glob pattern: " + Original, + errc::invalid_argument); + + StringRef Chars = S.substr(1, End - 1); + S = S.substr(End + 1); + if (Chars.startswith("^")) { + Expected BV = expand(Chars.substr(1), Original); + if (!BV) + return BV.takeError(); + return BV->flip(); + } + return expand(Chars, Original); + } + default: + BitVector BV(256, false); + BV[S[0]] = true; + S = S.substr(1); + return BV; + } +} + +Expected GlobPattern::create(StringRef S) { + GlobPattern Pat; + + // S doesn't contain any metacharacter, + // so the regular string comparison should work. + if (!hasWildcard(S)) { + Pat.Exact = S; + return Pat; + } + + // S is something like "foo*". We can use startswith(). + if (S.endswith("*") && !hasWildcard(S.drop_back())) { + Pat.Prefix = S.drop_back(); + return Pat; + } + + // S is something like "*foo". We can use endswith(). + if (S.startswith("*") && !hasWildcard(S.drop_front())) { + Pat.Suffix = S.drop_front(); + return Pat; + } + + // Otherwise, we need to do real glob pattern matching. + // Parse the pattern now. + StringRef Original = S; + while (!S.empty()) { + Expected BV = scan(S, Original); + if (!BV) + return BV.takeError(); + Pat.Tokens.push_back(*BV); + } + return Pat; +} + +bool GlobPattern::match(StringRef S) const { + if (Exact) + return S == *Exact; + if (Prefix) + return S.startswith(*Prefix); + if (Suffix) + return S.endswith(*Suffix); + return matchOne(Tokens, S); +} + +// Runs glob pattern Pats against string S. +bool GlobPattern::matchOne(ArrayRef Pats, StringRef S) const { + for (;;) { + if (Pats.empty()) + return S.empty(); + + // If Pats[0] is '*', try to match Pats[1..] against all possible + // tail strings of S to see at least one pattern succeeds. + if (Pats[0].size() == 0) { + Pats = Pats.slice(1); + if (Pats.empty()) + // Fast path. If a pattern is '*', it matches anything. + return true; + for (size_t I = 0, E = S.size(); I < E; ++I) + if (matchOne(Pats, S.substr(I))) + return true; + return false; + } + + // If Pats[0] is not '*', it must consume one character. + if (S.empty() || !Pats[0][S[0]]) + return false; + Pats = Pats.slice(1); + S = S.substr(1); + } +} diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 71c28ea..51b3c47 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -17,10 +17,11 @@ add_llvm_unittest(SupportTests DwarfTest.cpp EndianStreamTest.cpp EndianTest.cpp - ErrorTest.cpp ErrorOrTest.cpp + ErrorTest.cpp FileOutputBufferTest.cpp FormatVariadicTest.cpp + GlobPatternTest.cpp Host.cpp LEB128Test.cpp LineIteratorTest.cpp diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp new file mode 100644 index 0000000..44d7726 --- /dev/null +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -0,0 +1,70 @@ +//===- llvm/unittest/Support/GlobPatternTest.cpp --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GlobPattern.h" +#include "gtest/gtest.h" + +using namespace llvm; +namespace { + +class GlobPatternTest : public ::testing::Test {}; + +TEST_F(GlobPatternTest, Basics) { + Expected Pat1 = GlobPattern::create(""); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("")); + EXPECT_FALSE(Pat1->match("a")); + + Expected Pat2 = GlobPattern::create("ab*c*def"); + EXPECT_TRUE((bool)Pat2); + EXPECT_TRUE(Pat2->match("abcdef")); + EXPECT_TRUE(Pat2->match("abxcxdef")); + EXPECT_FALSE(Pat2->match("")); + EXPECT_FALSE(Pat2->match("xabcdef")); + EXPECT_FALSE(Pat2->match("abcdefx")); + + Expected Pat3 = GlobPattern::create("a??c"); + EXPECT_TRUE((bool)Pat3); + EXPECT_TRUE(Pat3->match("axxc")); + EXPECT_FALSE(Pat3->match("axxx")); + EXPECT_FALSE(Pat3->match("")); + + Expected Pat4 = GlobPattern::create("[abc-fy-z]"); + EXPECT_TRUE((bool)Pat4); + EXPECT_TRUE(Pat4->match("a")); + EXPECT_TRUE(Pat4->match("b")); + EXPECT_TRUE(Pat4->match("c")); + EXPECT_TRUE(Pat4->match("d")); + EXPECT_TRUE(Pat4->match("e")); + EXPECT_TRUE(Pat4->match("f")); + EXPECT_TRUE(Pat4->match("y")); + EXPECT_TRUE(Pat4->match("z")); + EXPECT_FALSE(Pat4->match("g")); + EXPECT_FALSE(Pat4->match("")); + + Expected Pat5 = GlobPattern::create("[^abc-fy-z]"); + EXPECT_TRUE((bool)Pat5); + EXPECT_TRUE(Pat5->match("g")); + EXPECT_FALSE(Pat5->match("a")); + EXPECT_FALSE(Pat5->match("b")); + EXPECT_FALSE(Pat5->match("c")); + EXPECT_FALSE(Pat5->match("d")); + EXPECT_FALSE(Pat5->match("e")); + EXPECT_FALSE(Pat5->match("f")); + EXPECT_FALSE(Pat5->match("y")); + EXPECT_FALSE(Pat5->match("z")); + EXPECT_FALSE(Pat5->match("")); +} + +TEST_F(GlobPatternTest, Invalid) { + Expected Pat1 = GlobPattern::create("["); + EXPECT_FALSE((bool)Pat1); + handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {}); +} +} -- 2.7.4