From: Kirill Bobyrev Date: Thu, 26 Jul 2018 18:25:48 +0000 (+0000) Subject: Revert Clangd Dex Iterators patch X-Git-Tag: llvmorg-7.0.0-rc1~494 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d75b556c563746a318ae92a50fb5c4d6dd180978;p=platform%2Fupstream%2Fllvm.git Revert Clangd Dex Iterators patch This reverts two revisions: * https://reviews.llvm.org/rL338017 * https://reviews.llvm.org/rL338028 They caused crash for Clang 3.6 & Clang 3.7 buildbots, it was reported by Jeremy Morse. llvm-svn: 338054 --- diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 33a6e84..07e23c2 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -43,7 +43,6 @@ add_clang_library(clangDaemon index/SymbolCollector.cpp index/SymbolYAML.cpp - index/dex/Iterator.cpp index/dex/Trigram.cpp LINK_LIBS diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp deleted file mode 100644 index 25107f9..0000000 --- a/clang-tools-extra/clangd/index/dex/Iterator.cpp +++ /dev/null @@ -1,244 +0,0 @@ -//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Iterator.h" -#include -#include -#include - -namespace clang { -namespace clangd { -namespace dex { - -namespace { - -/// Implements Iterator over a PostingList. DocumentIterator is the most basic -/// iterator: it doesn't have any children (hence it is the leaf of iterator -/// tree) and is simply a wrapper around PostingList::const_iterator. -class DocumentIterator : public Iterator { -public: - DocumentIterator(PostingListRef Documents) - : Documents(Documents), Index(std::begin(Documents)) {} - - bool reachedEnd() const override { return Index == std::end(Documents); } - - /// Advances cursor to the next item. - void advance() override { - assert(!reachedEnd() && "DocumentIterator can't advance at the end."); - ++Index; - } - - /// Applies binary search to advance cursor to the next item with DocID equal - /// or higher than the given one. - void advanceTo(DocID ID) override { - assert(!reachedEnd() && "DocumentIterator can't advance at the end."); - Index = std::lower_bound(Index, std::end(Documents), ID); - } - - DocID peek() const override { - assert(!reachedEnd() && "DocumentIterator can't call peek() at the end."); - return *Index; - } - - llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << '['; - auto Separator = ""; - for (const auto &ID : Documents) { - OS << Separator << ID; - Separator = ", "; - } - OS << ']'; - return OS; - } - -private: - PostingListRef Documents; - PostingListRef::const_iterator Index; -}; - -/// Implements Iterator over the intersection of other iterators. -/// -/// AndIterator iterates through common items among all children. It becomes -/// exhausted as soon as any child becomes exhausted. After each mutation, the -/// iterator restores the invariant: all children must point to the same item. -class AndIterator : public Iterator { -public: - AndIterator(std::vector> AllChildren) - : Children(std::move(AllChildren)) { - assert(!Children.empty() && "AndIterator should have at least one child."); - // Establish invariants. - sync(); - } - - bool reachedEnd() const override { return ReachedEnd; } - - /// Advances all children to the next common item. - void advance() override { - assert(!reachedEnd() && "AndIterator can't call advance() at the end."); - Children.front()->advance(); - sync(); - } - - /// Advances all children to the next common item with DocumentID >= ID. - void advanceTo(DocID ID) override { - assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end."); - Children.front()->advanceTo(ID); - sync(); - } - - DocID peek() const override { return Children.front()->peek(); } - - llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << "(& "; - auto Separator = ""; - for (const auto &Child : Children) { - OS << Separator << *Child; - Separator = " "; - } - OS << ')'; - return OS; - } - -private: - /// Restores class invariants: each child will point to the same element after - /// sync. - void sync() { - ReachedEnd |= Children.front()->reachedEnd(); - if (ReachedEnd) - return; - auto SyncID = Children.front()->peek(); - // Indicates whether any child needs to be advanced to new SyncID. - bool NeedsAdvance = false; - do { - NeedsAdvance = false; - for (auto &Child : Children) { - Child->advanceTo(SyncID); - ReachedEnd |= Child->reachedEnd(); - // If any child reaches end And iterator can not match any other items. - // In this case, just terminate the process. - if (ReachedEnd) - return; - // If any child goes beyond given ID (i.e. ID is not the common item), - // all children should be advanced to the next common item. - // FIXME(kbobyrev): This is not a very optimized version; after costs - // are introduced, cycle should break whenever ID exceeds current one - // and cheapest children should be advanced over again. - if (Child->peek() > SyncID) { - SyncID = Child->peek(); - NeedsAdvance = true; - } - } - } while (NeedsAdvance); - } - - /// AndIterator owns its children and ensures that all of them point to the - /// same element. As soon as one child gets exhausted, AndIterator can no - /// longer advance and has reached its end. - std::vector> Children; - /// Indicates whether any child is exhausted. It is cheaper to maintain and - /// update the field, rather than traversing the whole subtree in each - /// reachedEnd() call. - bool ReachedEnd = false; -}; - -/// Implements Iterator over the union of other iterators. -/// -/// OrIterator iterates through all items which can be pointed to by at least -/// one child. To preserve the sorted order, this iterator always advances the -/// child with smallest Child->peek() value. OrIterator becomes exhausted as -/// soon as all of its children are exhausted. -class OrIterator : public Iterator { -public: - OrIterator(std::vector> AllChildren) - : Children(std::move(AllChildren)) { - assert(Children.size() > 0 && "Or Iterator must have at least one child."); - } - - /// Returns true if all children are exhausted. - bool reachedEnd() const override { - return std::all_of(begin(Children), end(Children), - [](const std::unique_ptr &Child) { - return Child->reachedEnd(); - }); - } - - /// Moves each child pointing to the smallest DocID to the next item. - void advance() override { - assert(!reachedEnd() && - "OrIterator must have at least one child to advance()."); - const auto SmallestID = peek(); - for (const auto &Child : Children) - if (!Child->reachedEnd() && Child->peek() == SmallestID) - Child->advance(); - } - - /// Advances each child to the next existing element with DocumentID >= ID. - void advanceTo(DocID ID) override { - assert(!reachedEnd() && "Can't advance iterator after it reached the end."); - for (const auto &Child : Children) - if (!Child->reachedEnd()) - Child->advanceTo(ID); - } - - /// Returns the element under cursor of the child with smallest Child->peek() - /// value. - DocID peek() const override { - assert(!reachedEnd() && - "OrIterator must have at least one child to peek()."); - DocID Result = std::numeric_limits::max(); - - for (const auto &Child : Children) - if (!Child->reachedEnd()) - Result = std::min(Result, Child->peek()); - - return Result; - } - - llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { - OS << "(| "; - auto Separator = ""; - for (const auto &Child : Children) { - OS << Separator << *Child; - Separator = " "; - } - OS << ')'; - return OS; - } - -private: - // FIXME(kbobyrev): Would storing Children in min-heap be faster? - std::vector> Children; -}; - -} // end namespace - -std::vector consume(Iterator &It) { - std::vector Result; - for (; !It.reachedEnd(); It.advance()) - Result.push_back(It.peek()); - return Result; -} - -std::unique_ptr create(PostingListRef Documents) { - return llvm::make_unique(Documents); -} - -std::unique_ptr -createAnd(std::vector> Children) { - return llvm::make_unique(move(Children)); -} - -std::unique_ptr -createOr(std::vector> Children) { - return llvm::make_unique(move(Children)); -} - -} // namespace dex -} // namespace clangd -} // namespace clang diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h deleted file mode 100644 index 21309f1..0000000 --- a/clang-tools-extra/clangd/index/dex/Iterator.h +++ /dev/null @@ -1,143 +0,0 @@ -//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Symbol index queries consist of specific requirements for the requested -// symbol, such as high fuzzy matching score, scope, type etc. The lists of all -// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope) -// are expressed in a form of Search Tokens which are stored in the inverted -// index. Inverted index maps these tokens to the posting lists - sorted ( by -// symbol quality) sequences of symbol IDs matching the token, e.g. scope token -// "clangd::clangd::" is mapped to the list of IDs of all symbols which are -// declared in this namespace. Search queries are build from a set of -// requirements which can be combined with each other forming the query trees. -// The leafs of such trees are posting lists, and the nodes are operations on -// these posting lists, e.g. intersection or union. Efficient processing of -// these multi-level queries is handled by Iterators. Iterators advance through -// all leaf posting lists producing the result of search query, which preserves -// the sorted order of IDs. Having the resulting IDs sorted is important, -// because it allows receiving a certain number of the most valuable items (e.g. -// symbols with highest quality which was the sorting key in the first place) -// without processing all items with requested properties (this might not be -// computationally effective if search request is not very restrictive). -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include - -namespace clang { -namespace clangd { -namespace dex { - -/// Symbol position in the list of all index symbols sorted by a pre-computed -/// symbol quality. -using DocID = uint32_t; -/// Contains sorted sequence of DocIDs all of which belong to symbols matching -/// certain criteria, i.e. containing a Search Token. PostingLists are values -/// for the inverted index. -using PostingList = std::vector; -/// Immutable reference to PostingList object. -using PostingListRef = llvm::ArrayRef; - -/// Iterator is the interface for Query Tree node. The simplest type of Iterator -/// is DocumentIterator which is simply a wrapper around PostingList iterator -/// and serves as the Query Tree leaf. More sophisticated examples of iterators -/// can manage intersection, union of the elements produced by other iterators -/// (their children) to form a multi-level Query Tree. The interface is designed -/// to be extensible in order to support multiple types of iterators. -class Iterator { - // FIXME(kbobyrev): Provide callback for matched documents. - // FIXME(kbobyrev): Implement new types of iterators: Label, Boost (with - // scoring), Limit. - // FIXME(kbobyrev): Implement iterator cost, an estimate of advance() calls - // before iterator exhaustion. -public: - /// Returns true if all valid DocIDs were processed and hence the iterator is - /// exhausted. - virtual bool reachedEnd() const = 0; - /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted - /// and proceeds to the END. - /// - /// Note: reachedEnd() must be false. - virtual void advance() = 0; - /// Moves to the first valid DocID which is equal or higher than given ID. If - /// it doesn't exist, the iterator is exhausted and proceeds to the END. - /// - /// Note: reachedEnd() must be false. - virtual void advanceTo(DocID ID) = 0; - /// Returns the current element this iterator points to. - /// - /// Note: reachedEnd() must be false. - virtual DocID peek() const = 0; - - virtual ~Iterator() {} - - /// Prints a convenient human-readable iterator representation by recursively - /// dumping iterators in the following format: - /// - /// (Type Child1 Child2 ...) - /// - /// Where Type is the iterator type representation: "&" for And, "|" for Or, - /// ChildN is N-th iterator child. Raw iterators over PostingList are - /// represented as "[ID1, ID2, ...]" where IDN is N-th PostingList entry. - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const Iterator &Iterator) { - return Iterator.dump(OS); - } - -private: - virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; -}; - -/// Exhausts given iterator and returns all processed DocIDs. The result -/// contains sorted DocumentIDs. -std::vector consume(Iterator &It); - -/// Returns a document iterator over given PostingList. -std::unique_ptr create(PostingListRef Documents); - -/// Returns AND Iterator which performs the intersection of the PostingLists of -/// its children. -std::unique_ptr -createAnd(std::vector> Children); - -/// Returns OR Iterator which performs the union of the PostingLists of its -/// children. -std::unique_ptr -createOr(std::vector> Children); - -/// This allows createAnd({create(...), create(...)}) syntax. -template -std::unique_ptr -createAnd(std::unique_ptr(&&Children)[Size]) { - std::vector> Elements; - std::move(begin(Children), end(Children), std::back_inserter(Elements)); - return createAnd(move(Elements)); -} - -/// This allows createOr({create(...), create(...)}) syntax. -template -std::unique_ptr -createOr(std::unique_ptr(&&Children)[Size]) { - std::vector> Elements; - std::move(begin(Children), end(Children), std::back_inserter(Elements)); - return createOr(move(Elements)); -} - -} // namespace dex -} // namespace clangd -} // namespace clang - -#endif diff --git a/clang-tools-extra/unittests/clangd/DexIndexTests.cpp b/clang-tools-extra/unittests/clangd/DexIndexTests.cpp index c3be710..d48d4d8 100644 --- a/clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -7,13 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "index/dex/Iterator.h" #include "index/dex/Token.h" #include "index/dex/Trigram.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_ostream.h" #include "gmock/gmock.h" #include "gtest/gtest.h" + #include #include @@ -23,226 +21,6 @@ namespace dex { using ::testing::ElementsAre; -TEST(DexIndexIterators, DocumentIterator) { - const PostingList L = {4, 7, 8, 20, 42, 100}; - auto DocIterator = create(L); - - EXPECT_EQ(DocIterator->peek(), 4U); - EXPECT_EQ(DocIterator->reachedEnd(), false); - - DocIterator->advance(); - EXPECT_EQ(DocIterator->peek(), 7U); - EXPECT_EQ(DocIterator->reachedEnd(), false); - - DocIterator->advanceTo(20); - EXPECT_EQ(DocIterator->peek(), 20U); - EXPECT_EQ(DocIterator->reachedEnd(), false); - - DocIterator->advanceTo(65); - EXPECT_EQ(DocIterator->peek(), 100U); - EXPECT_EQ(DocIterator->reachedEnd(), false); - - DocIterator->advanceTo(420); - EXPECT_EQ(DocIterator->reachedEnd(), true); -} - -TEST(DexIndexIterators, AndWithEmpty) { - const PostingList L0; - const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000}; - - auto AndEmpty = createAnd({create(L0)}); - EXPECT_EQ(AndEmpty->reachedEnd(), true); - - auto AndWithEmpty = createAnd({create(L0), create(L1)}); - EXPECT_EQ(AndWithEmpty->reachedEnd(), true); - - EXPECT_THAT(consume(*AndWithEmpty), ElementsAre()); -} - -TEST(DexIndexIterators, AndTwoLists) { - const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000}; - const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000}; - - auto And = createAnd({create(L1), create(L0)}); - - EXPECT_EQ(And->reachedEnd(), false); - EXPECT_THAT(consume(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U)); - - And = createAnd({create(L0), create(L1)}); - - And->advanceTo(0); - EXPECT_EQ(And->peek(), 0U); - And->advanceTo(5); - EXPECT_EQ(And->peek(), 7U); - And->advanceTo(10); - EXPECT_EQ(And->peek(), 10U); - And->advanceTo(42); - EXPECT_EQ(And->peek(), 320U); - And->advanceTo(8999); - EXPECT_EQ(And->peek(), 9000U); - And->advanceTo(9001); -} - -TEST(DexIndexIterators, AndThreeLists) { - const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000}; - const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000}; - const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000}; - - auto And = createAnd({create(L0), create(L1), create(L2)}); - EXPECT_EQ(And->peek(), 7U); - And->advanceTo(300); - EXPECT_EQ(And->peek(), 320U); - And->advanceTo(100000); - - EXPECT_EQ(And->reachedEnd(), true); -} - -TEST(DexIndexIterators, OrWithEmpty) { - const PostingList L0; - const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000}; - - auto OrEmpty = createOr({create(L0)}); - EXPECT_EQ(OrEmpty->reachedEnd(), true); - - auto OrWithEmpty = createOr({create(L0), create(L1)}); - EXPECT_EQ(OrWithEmpty->reachedEnd(), false); - - EXPECT_THAT(consume(*OrWithEmpty), - ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U)); -} - -TEST(DexIndexIterators, OrTwoLists) { - const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000}; - const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000}; - - auto Or = createOr({create(L0), create(L1)}); - - EXPECT_EQ(Or->reachedEnd(), false); - EXPECT_EQ(Or->peek(), 0U); - Or->advance(); - EXPECT_EQ(Or->peek(), 4U); - Or->advance(); - EXPECT_EQ(Or->peek(), 5U); - Or->advance(); - EXPECT_EQ(Or->peek(), 7U); - Or->advance(); - EXPECT_EQ(Or->peek(), 10U); - Or->advance(); - EXPECT_EQ(Or->peek(), 30U); - Or->advanceTo(42); - EXPECT_EQ(Or->peek(), 42U); - Or->advanceTo(300); - EXPECT_EQ(Or->peek(), 320U); - Or->advanceTo(9000); - EXPECT_EQ(Or->peek(), 9000U); - Or->advanceTo(9001); - EXPECT_EQ(Or->reachedEnd(), true); - - Or = createOr({create(L0), create(L1)}); - - EXPECT_THAT(consume(*Or), - ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U)); -} - -TEST(DexIndexIterators, OrThreeLists) { - const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000}; - const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000}; - const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000}; - - auto Or = createOr({create(L0), create(L1), create(L2)}); - - EXPECT_EQ(Or->reachedEnd(), false); - EXPECT_EQ(Or->peek(), 0U); - - Or->advance(); - EXPECT_EQ(Or->peek(), 1U); - - Or->advance(); - EXPECT_EQ(Or->peek(), 4U); - - Or->advanceTo(7); - - Or->advanceTo(59); - EXPECT_EQ(Or->peek(), 60U); - - Or->advanceTo(9001); - EXPECT_EQ(Or->reachedEnd(), true); -} - -// FIXME(kbobyrev): The testcase below is similar to what is expected in real -// queries. It should be updated once new iterators (such as boosting, limiting, -// etc iterators) appear. However, it is not exhaustive and it would be -// beneficial to implement automatic generation of query trees for more -// comprehensive testing. -TEST(DexIndexIterators, QueryTree) { - // An example of more complicated query - // - // +-----------------+ - // |And Iterator:1, 5| - // +--------+--------+ - // | - // | - // +------------------------------------+ - // | | - // | | - // +----------v----------+ +----------v---------+ - // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 5| - // +----------+----------+ +----------+---------+ - // | | - // +------+-----+ +---------+-----------+ - // | | | | | - // +-------v-----+ +----v-----+ +--v--+ +-V--+ +---v---+ - // |1, 3, 5, 8, 9| |1, 5, 7, 9| |Empty| |0, 5| |0, 1, 5| - // +-------------+ +----------+ +-----+ +----+ +-------+ - - const PostingList L0 = {1, 3, 5, 8, 9}; - const PostingList L1 = {1, 5, 7, 9}; - const PostingList L2 = {0, 5}; - const PostingList L3 = {0, 1, 5}; - const PostingList L4; - - // Root of the query tree: [1, 5] - auto Root = createAnd({ - // Lower And Iterator: [1, 5, 9] - createAnd({create(L0), create(L1)}), - // Lower Or Iterator: [0, 1, 5] - createOr({create(L2), create(L3), create(L4)}), - }); - - EXPECT_EQ(Root->reachedEnd(), false); - EXPECT_EQ(Root->peek(), 1U); - Root->advanceTo(0); - // Advance multiple times. Shouldn't do anything. - Root->advanceTo(1); - Root->advanceTo(0); - EXPECT_EQ(Root->peek(), 1U); - Root->advance(); - EXPECT_EQ(Root->peek(), 5U); - Root->advanceTo(5); - EXPECT_EQ(Root->peek(), 5U); - Root->advanceTo(9000); - EXPECT_EQ(Root->reachedEnd(), true); -} - -TEST(DexIndexIterators, StringRepresentation) { - const PostingList L0 = {4, 7, 8, 20, 42, 100}; - const PostingList L1 = {1, 3, 5, 8, 9}; - const PostingList L2 = {1, 5, 7, 9}; - const PostingList L3 = {0, 5}; - const PostingList L4 = {0, 1, 5}; - const PostingList L5; - - EXPECT_EQ(llvm::to_string(*(create(L0))), "[4, 7, 8, 20, 42, 100]"); - - auto Nested = createAnd({ - createAnd({create(L1), create(L2)}), - createOr({create(L3), create(L4), create(L5)}), - }); - - EXPECT_EQ(llvm::to_string(*Nested), - "(& (& [1, 3, 5, 8, 9] [1, 5, 7, 9]) (| [0, 5] [0, 1, 5] []))"); -} - testing::Matcher> trigramsAre(std::initializer_list Trigrams) { std::vector Tokens;