[clangd] Use ML Code completion ranking as default.
authorUtkarsh Saxena <usx@google.com>
Mon, 1 Feb 2021 15:49:38 +0000 (16:49 +0100)
committerUtkarsh Saxena <usx@google.com>
Tue, 2 Mar 2021 09:05:37 +0000 (10:05 +0100)
This makes code completion use a Decision Forest based ranking algorithm to rank
completion candidates. [Esitmated 6% accuracy boost]. This was
previously hidden behind the flag --ranking-model=decision_forest. This
patch makes it the default ranking algorithm.

Note: this is a generic model, not specialized for any particular
project. clangd does not collect or upload data to train code completion.

Also treat Keywords separately as they are not recorded by the training set generator.

Differential Revision: https://reviews.llvm.org/D96353

clang-tools-extra/clangd/CodeComplete.h
clang-tools-extra/clangd/Quality.cpp
clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp

index debf71d..40a528c 100644 (file)
@@ -133,7 +133,7 @@ struct CodeCompleteOptions {
   enum CodeCompletionRankingModel {
     Heuristics,
     DecisionForest,
-  } RankingModel = Heuristics;
+  } RankingModel = DecisionForest;
 
   /// Callback used to score a CompletionCandidate if DecisionForest ranking
   /// model is enabled.
index b49392b..9942100 100644 (file)
@@ -580,12 +580,16 @@ evaluateDecisionForest(const SymbolQualitySignals &Quality,
   // multiplciative boost (like NameMatch). This allows us to weigh the
   // prediciton score and NameMatch appropriately.
   Scores.ExcludingName = pow(Base, Evaluate(E));
-  // NeedsFixIts is not part of the DecisionForest as generating training
-  // data that needs fixits is not-feasible.
+  // Following cases are not part of the generated training dataset:
+  //  - Symbols with `NeedsFixIts`.
+  //  - Forbidden symbols.
+  //  - Keywords: Dataset contains only macros and decls.
   if (Relevance.NeedsFixIts)
     Scores.ExcludingName *= 0.5;
   if (Relevance.Forbidden)
     Scores.ExcludingName *= 0;
+  if (Quality.Category == SymbolQualitySignals::Keyword)
+    Scores.ExcludingName *= 4;
 
   // NameMatch should be a multiplier on total score to support rescoring.
   Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
index b7a4017..0ff1e83 100644 (file)
@@ -647,13 +647,13 @@ TEST(CompletionTest, ScopedWithFilter) {
 }
 
 TEST(CompletionTest, ReferencesAffectRanking) {
-  auto Results = completions("int main() { abs^ }", {ns("absl"), func("absb")});
-  EXPECT_THAT(Results.Completions,
-              HasSubsequence(Named("absb"), Named("absl")));
-  Results = completions("int main() { abs^ }",
-                        {withReferences(10000, ns("absl")), func("absb")});
-  EXPECT_THAT(Results.Completions,
-              HasSubsequence(Named("absl"), Named("absb")));
+  EXPECT_THAT(completions("int main() { abs^ }", {func("absA"), func("absB")})
+                  .Completions,
+              HasSubsequence(Named("absA"), Named("absB")));
+  EXPECT_THAT(completions("int main() { abs^ }",
+                          {func("absA"), withReferences(1000, func("absB"))})
+                  .Completions,
+              HasSubsequence(Named("absB"), Named("absA")));
 }
 
 TEST(CompletionTest, ContextWords) {