// using 100x of the requested number might not be good in practice, e.g.
// when the requested number of items is small.
const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+ auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
// FIXME(kbobyrev): Add boosting to the query and utilize retrieved
// boosting scores.
- std::vector<std::pair<DocID, float>> SymbolDocIDs =
- consume(*QueryIterator, ItemsToRetrieve);
+ std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
// Retrieve top Req.MaxCandidateCount items.
std::priority_queue<std::pair<float, const Symbol *>> Top;
return *Index;
}
- float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; }
+ float consume() override { return DEFAULT_BOOST_SCORE; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
DocID peek() const override { return Children.front()->peek(); }
- // If not exhausted and points to the given item, consume() returns the
- // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is
- // returned.
- float consume(DocID ID) override {
- if (reachedEnd() || peek() != ID)
- return DEFAULT_BOOST_SCORE;
+ float consume() override {
+ assert(!reachedEnd() && "AndIterator can't consume() at the end.");
return std::accumulate(
begin(Children), end(Children), DEFAULT_BOOST_SCORE,
[&](float Current, const std::unique_ptr<Iterator> &Child) {
- return Current * Child->consume(ID);
+ return Current * Child->consume();
});
}
// Returns the maximum boosting score among all Children when iterator is not
// exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
- float consume(DocID ID) override {
- if (reachedEnd() || peek() != ID)
- return DEFAULT_BOOST_SCORE;
+ float consume() override {
+ assert(!reachedEnd() &&
+ "OrIterator can't consume() after it reached the end.");
+ const DocID ID = peek();
return std::accumulate(
begin(Children), end(Children), DEFAULT_BOOST_SCORE,
- [&](float Current, const std::unique_ptr<Iterator> &Child) {
+ [&](float Boost, const std::unique_ptr<Iterator> &Child) {
return (!Child->reachedEnd() && Child->peek() == ID)
- ? std::max(Current, Child->consume(ID))
- : Current;
+ ? std::max(Boost, Child->consume())
+ : Boost;
});
}
return Index;
}
- float consume(DocID) override { return DEFAULT_BOOST_SCORE; }
+ float consume() override { return DEFAULT_BOOST_SCORE; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
DocID peek() const override { return Child->peek(); }
- float consume(DocID ID) override { return Child->consume(ID) * Factor; }
+ float consume() override { return Child->consume() * Factor; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
float Factor;
};
+/// This iterator limits the number of items retrieved from the child iterator
+/// on top of the query tree. To ensure that query tree with LIMIT iterators
+/// inside works correctly, users have to call Root->consume(Root->peek()) each
+/// time item is retrieved at the root of query tree.
+class LimitIterator : public Iterator {
+public:
+ LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
+ : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {}
+
+ bool reachedEnd() const override {
+ return ItemsLeft == 0 || Child->reachedEnd();
+ }
+
+ void advance() override { Child->advance(); }
+
+ void advanceTo(DocID ID) override { Child->advanceTo(ID); }
+
+ DocID peek() const override { return Child->peek(); }
+
+ /// Decreases the limit in case the element consumed at top of the query tree
+ /// comes from the underlying iterator.
+ float consume() override {
+ assert(!reachedEnd() && "LimitIterator can't consume at the end.");
+ --ItemsLeft;
+ return Child->consume();
+ }
+
+private:
+ llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+ OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
+ return OS;
+ }
+
+ std::unique_ptr<Iterator> Child;
+ size_t Limit;
+ size_t ItemsLeft;
+};
+
} // end namespace
-std::vector<std::pair<DocID, float>> consume(Iterator &It, size_t Limit) {
+std::vector<std::pair<DocID, float>> consume(Iterator &It) {
std::vector<std::pair<DocID, float>> Result;
- for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit;
- It.advance(), ++Retrieved) {
- DocID Document = It.peek();
- Result.push_back(std::make_pair(Document, It.consume(Document)));
- }
+ for (; !It.reachedEnd(); It.advance())
+ Result.push_back(std::make_pair(It.peek(), It.consume()));
return Result;
}
return llvm::make_unique<BoostIterator>(move(Child), Factor);
}
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+ size_t Size) {
+ return llvm::make_unique<LimitIterator>(move(Child), Size);
+}
+
} // namespace dex
} // namespace clangd
} // namespace clang
///
/// Note: reachedEnd() must be false.
virtual DocID peek() const = 0;
- /// Retrieves boosting score. Query tree root should pass Root->peek() to this
- /// function, the parameter is needed to propagate through the tree. Given ID
- /// should be compared against BOOST iterator peek()s: some of the iterators
- /// would not point to the item which was propagated to the top of the query
- /// tree (e.g. if these iterators are branches of OR iterator) and hence
- /// shouldn't apply any boosting to the consumed item.
- virtual float consume(DocID ID) = 0;
+ /// Informs the iterator that the current document was consumed, and returns
+ /// its boost.
+ ///
+ /// Note: If this iterator has any child iterators that contain the document,
+ /// consume() should be called on those and their boosts incorporated.
+ /// consume() must *not* be called on children that don't contain the current
+ /// doc.
+ virtual float consume() = 0;
virtual ~Iterator() {}
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
};
-/// Advances the iterator until it is either exhausted or the number of
-/// requested items is reached. Returns pairs of document IDs with the
-/// corresponding boosting score.
+/// Advances the iterator until it is exhausted. Returns pairs of document IDs
+/// with the corresponding boosting score.
///
/// Boosting can be seen as a compromise between retrieving too many items and
/// calculating finals score for each of them (which might be very expensive)
/// and not retrieving enough items so that items with very high final score
/// would not be processed. Boosting score is a computationally efficient way
/// to acquire preliminary scores of requested items.
-std::vector<std::pair<DocID, float>>
-consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max());
+std::vector<std::pair<DocID, float>> consume(Iterator &It);
/// Returns a document iterator over given PostingList.
///
std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
float Factor);
+/// Returns LIMIT iterator, which yields up to N elements of its child iterator.
+/// Elements only count towards the limit if they are part of the final result
+/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2),
+/// not ().
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+ size_t Limit);
+
/// This allows createAnd(create(...), create(...)) syntax.
template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
std::vector<std::unique_ptr<Iterator>> Children;
namespace dex {
namespace {
-std::vector<DocID>
-consumeIDs(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()) {
- auto IDAndScore = consume(It, Limit);
+std::vector<DocID> consumeIDs(Iterator &It) {
+ auto IDAndScore = consume(It);
std::vector<DocID> IDs(IDAndScore.size());
for (size_t I = 0; I < IDAndScore.size(); ++I)
IDs[I] = IDAndScore[I].first;
Root->advanceTo(1);
Root->advanceTo(0);
EXPECT_EQ(Root->peek(), 1U);
- auto ElementBoost = Root->consume(Root->peek());
+ auto ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 6);
Root->advance();
EXPECT_EQ(Root->peek(), 5U);
Root->advanceTo(5);
EXPECT_EQ(Root->peek(), 5U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 8);
Root->advanceTo(9000);
EXPECT_TRUE(Root->reachedEnd());
}
TEST(DexIndexIterators, Limit) {
- const PostingList L0 = {4, 7, 8, 20, 42, 100};
- const PostingList L1 = {1, 3, 5, 8, 9};
- const PostingList L2 = {1, 5, 7, 9};
- const PostingList L3 = {0, 5};
- const PostingList L4 = {0, 1, 5};
- const PostingList L5;
+ const PostingList L0 = {3, 6, 7, 20, 42, 100};
+ const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
+ const PostingList L2 = {0, 3, 5, 7, 8, 100};
- auto DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100));
+ auto DocIterator = createLimit(create(L0), 42);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
- DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100));
+ DocIterator = createLimit(create(L0), 3);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
- DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8));
+ DocIterator = createLimit(create(L0), 0);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
- DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre());
+ auto AndIterator =
+ createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
+ createLimit(create(L1), 3), createLimit(create(L2), 42));
+ EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
}
TEST(DexIndexIterators, True) {
TEST(DexIndexIterators, Boost) {
auto BoostIterator = createBoost(createTrue(5U), 42U);
EXPECT_FALSE(BoostIterator->reachedEnd());
- auto ElementBoost = BoostIterator->consume(BoostIterator->peek());
+ auto ElementBoost = BoostIterator->consume();
EXPECT_THAT(ElementBoost, 42U);
const PostingList L0 = {2, 4};
auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
createBoost(create(L1), 3U));
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
Root->advance();
EXPECT_THAT(Root->peek(), 1U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 3);
Root->advance();
EXPECT_THAT(Root->peek(), 2U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 2);
Root->advanceTo(4);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 3);
}