[clangd] IncludeCleaner: Add support for IWYU pragma private
authorKirill Bobyrev <kbobyrev@google.com>
Thu, 31 Mar 2022 10:49:38 +0000 (12:49 +0200)
committerKirill Bobyrev <kbobyrev@google.com>
Thu, 31 Mar 2022 10:49:52 +0000 (12:49 +0200)
Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D120306

clang-tools-extra/clangd/IncludeCleaner.cpp
clang-tools-extra/clangd/IncludeCleaner.h
clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

index 04dbf12..14cc340 100644 (file)
@@ -12,6 +12,7 @@
 #include "ParsedAST.h"
 #include "Protocol.h"
 #include "SourceCode.h"
+#include "index/CanonicalIncludes.h"
 #include "support/Logger.h"
 #include "support/Trace.h"
 #include "clang/AST/ASTContext.h"
@@ -23,6 +24,7 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 
@@ -303,9 +305,10 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) {
                                  &AST.getTokens());
 }
 
-ReferencedFiles
-findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM,
-                    llvm::function_ref<FileID(FileID)> HeaderResponsible) {
+ReferencedFiles findReferencedFiles(
+    const ReferencedLocations &Locs, const SourceManager &SM,
+    llvm::function_ref<FileID(FileID)> HeaderResponsible,
+    llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader) {
   std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
   llvm::sort(Sorted); // Group by FileID.
   ReferencedFilesBuilder Builder(SM);
@@ -324,33 +327,55 @@ findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM,
   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
   llvm::DenseSet<FileID> UserFiles;
-  for (FileID ID : Builder.Files)
+  llvm::StringSet<> PublicHeaders;
+  for (FileID ID : Builder.Files) {
     UserFiles.insert(HeaderResponsible(ID));
+    if (auto PublicHeader = UmbrellaHeader(ID)) {
+      PublicHeaders.insert(*PublicHeader);
+    }
+  }
 
   llvm::DenseSet<tooling::stdlib::Header> StdlibFiles;
   for (const auto &Symbol : Locs.Stdlib)
     for (const auto &Header : Symbol.headers())
       StdlibFiles.insert(Header);
 
-  return {std::move(UserFiles), std::move(StdlibFiles)};
+  return {std::move(UserFiles), std::move(StdlibFiles),
+          std::move(PublicHeaders)};
 }
 
 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
                                     const IncludeStructure &Includes,
+                                    const CanonicalIncludes &CanonIncludes,
                                     const SourceManager &SM) {
-  return findReferencedFiles(Locs, SM, [&SM, &Includes](FileID ID) {
-    return headerResponsible(ID, SM, Includes);
-  });
+  return findReferencedFiles(
+      Locs, SM,
+      [&SM, &Includes](FileID ID) {
+        return headerResponsible(ID, SM, Includes);
+      },
+      [&SM, &CanonIncludes](FileID ID) -> Optional<StringRef> {
+        const FileEntry *Entry = SM.getFileEntryForID(ID);
+        if (Entry) {
+          auto PublicHeader = CanonIncludes.mapHeader(Entry->getName());
+          if (!PublicHeader.empty()) {
+            return PublicHeader;
+          }
+        }
+        return llvm::None;
+      });
 }
 
 std::vector<const Inclusion *>
 getUnused(ParsedAST &AST,
-          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
+          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
+          const llvm::StringSet<> &ReferencedPublicHeaders) {
   trace::Span Tracer("IncludeCleaner::getUnused");
   std::vector<const Inclusion *> Unused;
   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
     if (!MFI.HeaderID)
       continue;
+    if (ReferencedPublicHeaders.contains(MFI.Written))
+      continue;
     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
     bool Used = ReferencedFiles.contains(IncludeID);
     if (!Used && !mayConsiderUnused(MFI, AST)) {
@@ -400,11 +425,12 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
   const auto &SM = AST.getSourceManager();
 
   auto Refs = findReferencedLocations(AST);
-  auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
-                                               AST.getSourceManager());
+  auto ReferencedFiles =
+      findReferencedFiles(Refs, AST.getIncludeStructure(),
+                          AST.getCanonicalIncludes(), AST.getSourceManager());
   auto ReferencedHeaders =
-      translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
-  return getUnused(AST, ReferencedHeaders);
+      translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM);
+  return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas);
 }
 
 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
index 183f84f..4ce31ba 100644 (file)
@@ -13,9 +13,6 @@
 /// to provide useful warnings in most popular scenarios but not 1:1 exact
 /// feature compatibility.
 ///
-/// FIXME(kirillbobyrev): Add support for IWYU pragmas.
-/// FIXME(kirillbobyrev): Add support for standard library headers.
-///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDECLEANER_H
 
 #include "Headers.h"
 #include "ParsedAST.h"
+#include "index/CanonicalIncludes.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include <vector>
 
 namespace clang {
@@ -58,6 +57,11 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST);
 struct ReferencedFiles {
   llvm::DenseSet<FileID> User;
   llvm::DenseSet<tooling::stdlib::Header> Stdlib;
+  /// Files responsible for the symbols referenced in the main file and defined
+  /// in private headers (private headers have IWYU pragma: private, include
+  /// "public.h"). We store spelling of the public header (with quotes or angle
+  /// brackets) files here to avoid dealing with full filenames and visibility.
+  llvm::StringSet<> SpelledUmbrellas;
 };
 
 /// Retrieves IDs of all files containing SourceLocations from \p Locs.
@@ -66,11 +70,16 @@ struct ReferencedFiles {
 /// \p HeaderResponsible returns the public header that should be included given
 /// symbols from a file with the given FileID (example: public headers should be
 /// preferred to non self-contained and private headers).
-ReferencedFiles
-findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM,
-                    llvm::function_ref<FileID(FileID)> HeaderResponsible);
+/// \p UmbrellaHeader returns the public public header is responsible for
+/// providing symbols from a file with the given FileID (example: MyType.h
+/// should be included instead of MyType_impl.h).
+ReferencedFiles findReferencedFiles(
+    const ReferencedLocations &Locs, const SourceManager &SM,
+    llvm::function_ref<FileID(FileID)> HeaderResponsible,
+    llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader);
 ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
                                     const IncludeStructure &Includes,
+                                    const CanonicalIncludes &CanonIncludes,
                                     const SourceManager &SM);
 
 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
@@ -83,7 +92,8 @@ translateToHeaderIDs(const ReferencedFiles &Files,
 /// In unclear cases, headers are not marked as unused.
 std::vector<const Inclusion *>
 getUnused(ParsedAST &AST,
-          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles);
+          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
+          const llvm::StringSet<> &ReferencedPublicHeaders);
 
 std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST);
 
index b6bf769..ffecea5 100644 (file)
@@ -9,6 +9,7 @@
 #include "Annotations.h"
 #include "IncludeCleaner.h"
 #include "SourceCode.h"
+#include "TestFS.h"
 #include "TestTU.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
@@ -280,8 +281,9 @@ TEST(IncludeCleaner, Stdlib) {
 
     ReferencedLocations Locs = findReferencedLocations(AST);
     EXPECT_THAT(Locs.Stdlib, ElementsAreArray(WantSyms));
-    ReferencedFiles Files = findReferencedFiles(Locs, AST.getIncludeStructure(),
-                                                AST.getSourceManager());
+    ReferencedFiles Files =
+        findReferencedFiles(Locs, AST.getIncludeStructure(),
+                            AST.getCanonicalIncludes(), AST.getSourceManager());
     EXPECT_THAT(Files.Stdlib, ElementsAreArray(WantHeaders));
   }
 }
@@ -392,8 +394,8 @@ TEST(IncludeCleaner, VirtualBuffers) {
   auto &SM = AST.getSourceManager();
   auto &Includes = AST.getIncludeStructure();
 
-  auto ReferencedFiles =
-      findReferencedFiles(findReferencedLocations(AST), Includes, SM);
+  auto ReferencedFiles = findReferencedFiles(
+      findReferencedLocations(AST), Includes, AST.getCanonicalIncludes(), SM);
   llvm::StringSet<> ReferencedFileNames;
   for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
@@ -412,7 +414,9 @@ TEST(IncludeCleaner, VirtualBuffers) {
   EXPECT_THAT(ReferencedHeaderNames, ElementsAre(testPath("macros.h")));
 
   // Sanity check.
-  EXPECT_THAT(getUnused(AST, ReferencedHeaders), IsEmpty());
+  EXPECT_THAT(
+      getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas),
+      IsEmpty());
 }
 
 TEST(IncludeCleaner, DistinctUnguardedInclusions) {
@@ -441,9 +445,9 @@ TEST(IncludeCleaner, DistinctUnguardedInclusions) {
 
   ParsedAST AST = TU.build();
 
-  auto ReferencedFiles =
-      findReferencedFiles(findReferencedLocations(AST),
-                          AST.getIncludeStructure(), AST.getSourceManager());
+  auto ReferencedFiles = findReferencedFiles(
+      findReferencedLocations(AST), AST.getIncludeStructure(),
+      AST.getCanonicalIncludes(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
   for (FileID FID : ReferencedFiles.User)
@@ -475,9 +479,9 @@ TEST(IncludeCleaner, NonSelfContainedHeaders) {
 
   ParsedAST AST = TU.build();
 
-  auto ReferencedFiles =
-      findReferencedFiles(findReferencedLocations(AST),
-                          AST.getIncludeStructure(), AST.getSourceManager());
+  auto ReferencedFiles = findReferencedFiles(
+      findReferencedLocations(AST), AST.getIncludeStructure(),
+      AST.getCanonicalIncludes(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
   for (FileID FID : ReferencedFiles.User)
@@ -493,15 +497,31 @@ TEST(IncludeCleaner, IWYUPragmas) {
   TestTU TU;
   TU.Code = R"cpp(
     #include "behind_keep.h" // IWYU pragma: keep
+    #include "public.h"
+
+    void bar() { foo(); }
     )cpp";
   TU.AdditionalFiles["behind_keep.h"] = guard("");
+  TU.AdditionalFiles["public.h"] = guard("#include \"private.h\"");
+  TU.AdditionalFiles["private.h"] = guard(R"cpp(
+    // IWYU pragma: private, include "public.h"
+    void foo() {}
+  )cpp");
   ParsedAST AST = TU.build();
 
-  auto ReferencedFiles =
-      findReferencedFiles(findReferencedLocations(AST),
-                          AST.getIncludeStructure(), AST.getSourceManager());
-  EXPECT_TRUE(ReferencedFiles.User.empty());
+  auto ReferencedFiles = findReferencedFiles(
+      findReferencedLocations(AST), AST.getIncludeStructure(),
+      AST.getCanonicalIncludes(), AST.getSourceManager());
+  EXPECT_EQ(ReferencedFiles.SpelledUmbrellas.size(), 1u);
+  EXPECT_EQ(ReferencedFiles.SpelledUmbrellas.begin()->getKey(), "\"public.h\"");
+  EXPECT_EQ(ReferencedFiles.User.size(), 2u);
+  EXPECT_TRUE(
+      ReferencedFiles.User.contains(AST.getSourceManager().getMainFileID()));
+  EXPECT_TRUE(
+      ReferencedFiles.User.contains(AST.getSourceManager().getMainFileID()));
   EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty()));
+  auto Unused = computeUnusedIncludes(AST);
+  EXPECT_THAT(Unused, IsEmpty());
 }
 
 } // namespace