Move the isSelfContainedHeader function from clangd to libtooling.
authorHaojian Wu <hokein.wu@gmail.com>
Mon, 7 Nov 2022 12:30:47 +0000 (13:30 +0100)
committerHaojian Wu <hokein.wu@gmail.com>
Mon, 14 Nov 2022 08:40:45 +0000 (09:40 +0100)
We plan to reuse it in the include-cleaner library, this patch moves
this functionality from clangd to libtooling, so that this piece of code can be
shared among all clang tools.

Differential Revision: https://reviews.llvm.org/D137697

clang-tools-extra/clangd/Headers.cpp
clang-tools-extra/clangd/SourceCode.cpp
clang-tools-extra/clangd/SourceCode.h
clang-tools-extra/clangd/index/SymbolCollector.cpp
clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h [new file with mode: 0644]
clang/lib/Tooling/Inclusions/CMakeLists.txt
clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp [new file with mode: 0644]
clang/unittests/Tooling/CMakeLists.txt
clang/unittests/Tooling/HeaderAnalysisTest.cpp [new file with mode: 0644]

index cbfeb63..f276f5b 100644 (file)
@@ -15,6 +15,7 @@
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Path.h"
 #include <cstring>
@@ -121,12 +122,10 @@ public:
         // isSelfContainedHeader only returns true once the full header-guard
         // structure has been seen, i.e. when exiting the *outer* copy of the
         // file. So last result wins.
-        if (isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo))
-          Out->NonSelfContained.erase(
-              *Out->getID(SM.getFileEntryForID(PrevFID)));
+        if (tooling::isSelfContainedHeader(FE, SM, HeaderInfo))
+          Out->NonSelfContained.erase(*Out->getID(FE));
         else
-          Out->NonSelfContained.insert(
-              *Out->getID(SM.getFileEntryForID(PrevFID)));
+          Out->NonSelfContained.insert(*Out->getID(FE));
       }
       break;
     }
index 5928541..5913db5 100644 (file)
@@ -1183,58 +1183,5 @@ bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
   return SM.getBufferData(FID).startswith(ProtoHeaderComment);
 }
 
-namespace {
-
-// Is Line an #if or #ifdef directive?
-// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
-// self-contained and is probably not what we want.
-bool isIf(llvm::StringRef Line) {
-  Line = Line.ltrim();
-  if (!Line.consume_front("#"))
-    return false;
-  Line = Line.ltrim();
-  return Line.startswith("if");
-}
-
-// Is Line an #error directive mentioning includes?
-bool isErrorAboutInclude(llvm::StringRef Line) {
-  Line = Line.ltrim();
-  if (!Line.consume_front("#"))
-    return false;
-  Line = Line.ltrim();
-  if (!Line.startswith("error"))
-    return false;
-  return Line.contains_insensitive(
-      "includ"); // Matches "include" or "including".
-}
-
-// Heuristically headers that only want to be included via an umbrella.
-bool isDontIncludeMeHeader(llvm::StringRef Content) {
-  llvm::StringRef Line;
-  // Only sniff up to 100 lines or 10KB.
-  Content = Content.take_front(100 * 100);
-  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
-    std::tie(Line, Content) = Content.split('\n');
-    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
-      return true;
-  }
-  return false;
-}
-
-} // namespace
-
-bool isSelfContainedHeader(const FileEntry *FE, FileID FID,
-                           const SourceManager &SM, HeaderSearch &HeaderInfo) {
-  // FIXME: Should files that have been #import'd be considered
-  // self-contained? That's really a property of the includer,
-  // not of the file.
-  if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
-      !HeaderInfo.hasFileBeenImported(FE))
-    return false;
-  // This pattern indicates that a header can't be used without
-  // particular preprocessor state, usually set up by another header.
-  return !isDontIncludeMeHeader(SM.getBufferData(FID));
-}
-
 } // namespace clangd
 } // namespace clang
index faed27d..70d1ebe 100644 (file)
@@ -325,11 +325,6 @@ bool isHeaderFile(llvm::StringRef FileName,
 /// Returns true if the given location is in a generated protobuf file.
 bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
 
-/// This scans source code, and should not be called when using a preamble.
-/// Prefer to access the cache in IncludeStructure::isSelfContained if you can.
-bool isSelfContainedHeader(const FileEntry *FE, FileID ID,
-                           const SourceManager &SM, HeaderSearch &HeaderInfo);
-
 /// Returns true if Name is reserved, like _Foo or __Vector_base.
 inline bool isReservedName(llvm::StringRef Name) {
   // This doesn't catch all cases, but the most common.
index ee948f8..a943746 100644 (file)
@@ -28,6 +28,7 @@
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/Token.h"
+#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FileSystem.h"
@@ -419,8 +420,8 @@ private:
                 getFrameworkHeaderIncludeSpelling(FE, HFI->Framework, HS))
           return *Spelling;
 
-    if (!isSelfContainedHeader(FE, FID, PP->getSourceManager(),
-                               PP->getHeaderSearchInfo())) {
+    if (!tooling::isSelfContainedHeader(FE, PP->getSourceManager(),
+                                        PP->getHeaderSearchInfo())) {
       // A .inc or .def file is often included into a real header to define
       // symbols (e.g. LLVM tablegen files).
       if (Filename.endswith(".inc") || Filename.endswith(".def"))
diff --git a/clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h b/clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h
new file mode 100644 (file)
index 0000000..b0b4b14
--- /dev/null
@@ -0,0 +1,33 @@
+//===--- HeaderAnalysis.h -----------------------------------------*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H
+#define LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H
+
+namespace clang {
+class FileEntry;
+class SourceManager;
+class HeaderSearch;
+
+namespace tooling {
+
+/// Returns true if the given physical file is a self-contained header.
+///
+/// A header is considered self-contained if
+//   - it has a proper header guard or has been #imported
+//   - *and* it doesn't have a dont-include-me pattern.
+///
+/// This function can be expensive as it may scan the source code to find out
+/// dont-include-me pattern heuristically.
+bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM,
+                           HeaderSearch &HeaderInfo);
+
+} // namespace tooling
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H
index 1954d16..78b25f4 100644 (file)
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangToolingInclusions
+  HeaderAnalysis.cpp
   HeaderIncludes.cpp
   IncludeStyle.cpp
 
diff --git a/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp b/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp
new file mode 100644 (file)
index 0000000..78b866e
--- /dev/null
@@ -0,0 +1,67 @@
+//===--- HeaderAnalysis.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/HeaderSearch.h"
+
+namespace clang::tooling {
+namespace {
+
+// Is Line an #if or #ifdef directive?
+// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
+// self-contained and is probably not what we want.
+bool isIf(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  return Line.startswith("if");
+}
+
+// Is Line an #error directive mentioning includes?
+bool isErrorAboutInclude(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  if (!Line.startswith("error"))
+    return false;
+  return Line.contains_insensitive(
+      "includ"); // Matches "include" or "including".
+}
+
+// Heuristically headers that only want to be included via an umbrella.
+bool isDontIncludeMeHeader(llvm::MemoryBufferRef Buffer) {
+  StringRef Content = Buffer.getBuffer();
+  llvm::StringRef Line;
+  // Only sniff up to 100 lines or 10KB.
+  Content = Content.take_front(100 * 100);
+  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+    std::tie(Line, Content) = Content.split('\n');
+    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+      return true;
+  }
+  return false;
+}
+
+} // namespace
+
+bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM,
+                           HeaderSearch &HeaderInfo) {
+  assert(FE);
+  if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
+      !HeaderInfo.hasFileBeenImported(FE))
+    return false;
+  // This pattern indicates that a header can't be used without
+  // particular preprocessor state, usually set up by another header.
+  return !isDontIncludeMeHeader(
+      const_cast<SourceManager &>(SM).getMemoryBufferForFileOrNone(FE).value_or(
+          llvm::MemoryBufferRef()));
+}
+} // namespace clang::tooling
index 424932e..ce9f556 100644 (file)
@@ -16,6 +16,7 @@ add_clang_unittest(ToolingTests
   DiagnosticsYamlTest.cpp
   ExecutionTest.cpp
   FixItTest.cpp
+  HeaderAnalysisTest.cpp
   HeaderIncludesTest.cpp
   StandardLibraryTest.cpp
   LexicallyOrderedRecursiveASTVisitorTest.cpp
diff --git a/clang/unittests/Tooling/HeaderAnalysisTest.cpp b/clang/unittests/Tooling/HeaderAnalysisTest.cpp
new file mode 100644 (file)
index 0000000..1a121e7
--- /dev/null
@@ -0,0 +1,66 @@
+//===- unittest/Tooling/HeaderAnalysisTest.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Testing/TestAST.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace tooling {
+namespace {
+
+TEST(HeaderAnalysisTest, IsSelfContained) {
+  TestInputs Inputs;
+  Inputs.Code = R"cpp(
+  #include "headerguard.h"
+  #include "pragmaonce.h"
+  #import "imported.h"
+
+  #include "bad.h"
+  #include "unguarded.h"
+  )cpp";
+
+  Inputs.ExtraFiles["headerguard.h"] = R"cpp(
+  #ifndef HEADER_H
+  #define HEADER_H
+
+  #endif HEADER_H
+  )cpp";
+  Inputs.ExtraFiles["pragmaonce.h"] = R"cpp(
+  #pragma once
+  )cpp";
+  Inputs.ExtraFiles["imported.h"] = "";
+
+  Inputs.ExtraFiles["unguarded.h"] = "";
+  Inputs.ExtraFiles["bad.h"] = R"cpp(
+  #pragma once
+
+  #if defined(INSIDE_H)
+  #error "Only ... can be included directly"
+  #endif
+  )cpp";
+
+  TestAST AST(Inputs);
+  const auto &SM = AST.sourceManager();
+  auto &FM = SM.getFileManager();
+  auto &HI = AST.preprocessor().getHeaderSearchInfo();
+  auto getFileID = [&](llvm::StringRef FileName) {
+    return SM.translateFile(FM.getFile(FileName).get());
+  };
+  EXPECT_TRUE(isSelfContainedHeader(getFileID("headerguard.h"), SM, HI));
+  EXPECT_TRUE(isSelfContainedHeader(getFileID("pragmaonce.h"), SM, HI));
+  EXPECT_TRUE(isSelfContainedHeader(getFileID("imported.h"), SM, HI));
+
+  EXPECT_FALSE(isSelfContainedHeader(getFileID("unguarded.h"), SM, HI));
+  EXPECT_FALSE(isSelfContainedHeader(getFileID("bad.h"), SM, HI));
+}
+
+} // namespace
+} // namespace tooling
+} // namespace clang