The occurrences are roots for finding used headers, like walkAST.
Includes are the targets we're matching used headers against.
Differential Revision: https://reviews.llvm.org/D136723
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/FileSystem/UniqueID.h"
+#include "clang-include-cleaner/Types.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
#include <memory>
#include <vector>
class CompilerInstance;
class Decl;
class FileEntry;
+class Preprocessor;
+class PPCallbacks;
namespace include_cleaner {
// FIXME: add selfcontained file.
};
-// Contains recorded parser events relevant to include-cleaner.
+/// Recorded main-file parser events relevant to include-cleaner.
struct RecordedAST {
- // The consumer (when installed into clang) tracks declarations in this.
+ /// The consumer (when installed into clang) tracks declarations in `*this`.
std::unique_ptr<ASTConsumer> record();
ASTContext *Ctx = nullptr;
- // The set of declarations written at file scope inside the main file.
- //
- // These are the roots of the subtrees that should be traversed to find uses.
- // (Traversing the TranslationUnitDecl would find uses inside headers!)
+ /// The set of declarations written at file scope inside the main file.
+ ///
+ /// These are the roots of the subtrees that should be traversed to find uses.
+ /// (Traversing the TranslationUnitDecl would find uses inside headers!)
std::vector<Decl *> Roots;
};
+/// Recorded main-file preprocessor events relevant to include-cleaner.
+///
+/// This doesn't include facts that we record globally for the whole TU, even
+/// when they occur in the main file (e.g. IWYU pragmas).
+struct RecordedPP {
+ /// The callback (when installed into clang) tracks macros/includes in this.
+ std::unique_ptr<PPCallbacks> record(const Preprocessor &PP);
+
+ /// Describes where macros were used in the main file.
+ std::vector<SymbolReference> MacroReferences;
+
+ /// A container for all includes present in the main file.
+ /// Supports efficiently hit-testing Headers against Includes.
+ /// FIXME: is there a more natural header for this class?
+ class RecordedIncludes {
+ public:
+ void add(const Include &);
+
+ /// All #includes seen, in the order they appear.
+ llvm::ArrayRef<Include> all() const { return All; }
+
+ /// Determine #includes that match a header (that provides a used symbol).
+ ///
+ /// Matching is based on the type of Header specified:
+ /// - for a physical file like /path/to/foo.h, we check Resolved
+ /// - for a logical file like <vector>, we check Spelled
+ llvm::SmallVector<const Include *> match(Header H) const;
+
+ private:
+ std::vector<Include> All;
+ // Lookup structures for match(), values are index into All.
+ llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling;
+ llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile;
+ } Includes;
+};
+
} // namespace include_cleaner
} // namespace clang
#ifndef CLANG_INCLUDE_CLEANER_TYPES_H
#define CLANG_INCLUDE_CLEANER_TYPES_H
+#include "clang/Basic/SourceLocation.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include <memory>
#include <vector>
namespace clang {
class Decl;
class FileEntry;
+class IdentifierInfo;
namespace include_cleaner {
+/// We consider a macro to be a different symbol each time it is defined.
+struct Macro {
+ IdentifierInfo *Name;
+ /// The location of the Name where the macro is defined.
+ SourceLocation Definition;
+
+ bool operator==(const Macro &S) const {
+ return Definition == S.Definition;
+ }
+};
+
/// An entity that can be referenced in the code.
struct Symbol {
enum Kind {
/// A canonical clang declaration.
Declaration,
+ /// A preprocessor macro, as defined in a specific location.
+ Macro,
/// A recognized symbol from the standard library, like std::string.
Standard,
};
Symbol(const Decl &D) : Storage(&D) {}
+ Symbol(struct Macro M) : Storage(M) {}
Symbol(tooling::stdlib::Symbol S) : Storage(S) {}
Kind kind() const { return static_cast<Kind>(Storage.index()); }
bool operator==(const Symbol &RHS) const { return Storage == RHS.Storage; }
+ const Decl &declaration() const { return *std::get<Declaration>(Storage); }
+ struct Macro macro() const { return std::get<Macro>(Storage); }
tooling::stdlib::Symbol standard() const {
return std::get<Standard>(Storage);
}
- const Decl &declaration() const { return *std::get<Declaration>(Storage); }
private:
// FIXME: Add support for macros.
// Order must match Kind enum!
- std::variant<const Decl *, tooling::stdlib::Symbol> Storage;
+ std::variant<const Decl *, struct Macro, tooling::stdlib::Symbol> Storage;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &);
+/// Indicates that a piece of code refers to a symbol.
+struct SymbolReference {
+ /// The symbol referred to.
+ Symbol Symbol;
+ /// The point in the code that refers to the symbol.
+ SourceLocation RefLocation;
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolReference &);
+
/// Represents a file that provides some symbol. Might not be includeable, e.g.
/// built-in or main-file itself.
struct Header {
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Header &);
+/// A single #include directive written in the main file.
+struct Include {
+ llvm::StringRef Spelled; // e.g. vector
+ const FileEntry *Resolved = nullptr; // e.g. /path/to/c++/v1/vector
+ // nullptr if the header was not found
+ SourceLocation HashLocation; // of hash in #include <vector>
+ unsigned Line = 0; // 1-based line number for #include
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &);
+
} // namespace include_cleaner
} // namespace clang
#endif
-
#include "clang/AST/DeclGroup.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
namespace clang::include_cleaner {
+namespace {
+
+class PPRecorder : public PPCallbacks {
+public:
+ PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
+ : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {}
+
+ void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID) override {
+ Active = SM.isWrittenInMainFile(Loc);
+ }
+
+ void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
+ StringRef SpelledFilename, bool IsAngled,
+ CharSourceRange FilenameRange,
+ llvm::Optional<FileEntryRef> File,
+ StringRef SearchPath, StringRef RelativePath,
+ const Module *, SrcMgr::CharacteristicKind) override {
+ if (!Active)
+ return;
+
+ Include I;
+ I.HashLocation = Hash;
+ I.Resolved = File ? &File->getFileEntry() : nullptr;
+ I.Line = SM.getSpellingLineNumber(Hash);
+ I.Spelled = SpelledFilename;
+ Recorded.Includes.add(I);
+ }
+
+ void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
+ SourceRange Range, const MacroArgs *Args) override {
+ if (!Active)
+ return;
+ recordMacroRef(MacroName, *MD.getMacroInfo());
+ }
+
+ void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
+ if (!Active)
+ return;
+
+ const auto *MI = MD->getMacroInfo();
+ // The tokens of a macro definition could refer to a macro.
+ // Formally this reference isn't resolved until this macro is expanded,
+ // but we want to treat it as a reference anyway.
+ for (const auto &Tok : MI->tokens()) {
+ auto *II = Tok.getIdentifierInfo();
+ // Could this token be a reference to a macro? (Not param to this macro).
+ if (!II || !II->hadMacroDefinition() ||
+ llvm::is_contained(MI->params(), II))
+ continue;
+ if (const MacroInfo *MI = PP.getMacroInfo(II))
+ recordMacroRef(Tok, *MI);
+ }
+ }
+
+ void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
+ const MacroDirective *) override {
+ if (!Active)
+ return;
+ if (const auto *MI = MD.getMacroInfo())
+ recordMacroRef(MacroName, *MI);
+ }
+
+private:
+ void recordMacroRef(const Token &Tok, const MacroInfo &MI) {
+ if (MI.isBuiltinMacro())
+ return; // __FILE__ is not a reference.
+ Recorded.MacroReferences.push_back(
+ SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
+ Tok.getLocation()});
+ }
+
+ bool Active = false;
+ RecordedPP &Recorded;
+ const Preprocessor &PP;
+ const SourceManager &SM;
+};
+
+} // namespace
// FIXME: this is a mirror of clang::clangd::parseIWYUPragma, move to libTooling
// to share the code?
return std::make_unique<Recorder>(this);
}
+void RecordedPP::RecordedIncludes::add(const Include &I) {
+ unsigned Index = All.size();
+ All.push_back(I);
+ auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first;
+ All.back().Spelled = BySpellingIt->first(); // Now we own the backing string.
+
+ BySpellingIt->second.push_back(Index);
+ if (I.Resolved)
+ ByFile[I.Resolved].push_back(Index);
+}
+
+llvm::SmallVector<const Include *>
+RecordedPP::RecordedIncludes::match(Header H) const {
+ llvm::SmallVector<const Include *> Result;
+ switch (H.kind()) {
+ case Header::Physical:
+ for (unsigned I : ByFile.lookup(H.physical()))
+ Result.push_back(&All[I]);
+ break;
+ case Header::Standard:
+ for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>")))
+ Result.push_back(&All[I]);
+ break;
+ }
+ return Result;
+}
+
+std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
+ return std::make_unique<PPRecorder>(*this, PP);
+}
+
+
} // namespace clang::include_cleaner
#include "clang-include-cleaner/Types.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/FileEntry.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
namespace clang::include_cleaner {
if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
return OS << ND->getNameAsString();
return OS << S.declaration().getDeclKindName();
+ case Symbol::Macro:
+ return OS << S.macro().Name;
case Symbol::Standard:
return OS << S.standard().scope() << S.standard().name();
}
llvm_unreachable("Unhandled Header kind");
}
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Include &I) {
+ return OS << I.Line << ": " << I.Spelled << " => "
+ << (I.Resolved ? I.Resolved->getName() : "<missing>");
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolReference &R) {
+ // We can't decode the Location without SourceManager. Its raw representation
+ // isn't completely useless (and distinguishes SymbolReference from Symbol).
+ return OS << R.Symbol << "@0x"
+ << llvm::utohexstr(R.RefLocation.getRawEncoding(),
+ /*Width=*/CHAR_BIT *
+ sizeof(SourceLocation::UIntTy));
+}
+
} // namespace clang::include_cleaner
#include "clang/Frontend/FrontendAction.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Testing/TestAST.h"
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Annotations.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang::include_cleaner {
namespace {
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::IsEmpty;
// Matches a Decl* if it is a NamedDecl with the given name.
-MATCHER_P(Named, N, "") {
+MATCHER_P(named, N, "") {
if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(arg)) {
if (N == ND->getNameAsString())
return true;
}
)cpp";
auto AST = build();
- EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("ns")));
+ EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("ns")));
}
// Decl in included file is not a root.
void mainFunc();
)cpp";
auto AST = build();
- EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("mainFunc")));
+ EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("mainFunc")));
}
// Decl from macro expanded into the main file is a root.
X
)cpp";
auto AST = build();
- EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("x")));
+ EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("x")));
+}
+
+class RecordPPTest : public ::testing::Test {
+protected:
+ TestInputs Inputs;
+ RecordedPP Recorded;
+
+ RecordPPTest() {
+ struct RecordAction : public PreprocessOnlyAction {
+ RecordedPP &Out;
+ RecordAction(RecordedPP &Out) : Out(Out) {}
+
+ void ExecuteAction() override {
+ auto &PP = getCompilerInstance().getPreprocessor();
+ PP.addPPCallbacks(Out.record(PP));
+ PreprocessOnlyAction::ExecuteAction();
+ }
+ };
+ Inputs.MakeAction = [this] {
+ return std::make_unique<RecordAction>(Recorded);
+ };
+ }
+
+ TestAST build() { return TestAST(Inputs); }
+};
+
+// Matches an Include with a particular spelling.
+MATCHER_P(spelled, S, "") { return arg.Spelled == S; }
+
+TEST_F(RecordPPTest, CapturesIncludes) {
+ llvm::Annotations MainFile(R"cpp(
+ $H^#include "./header.h"
+ $M^#include "missing.h"
+ )cpp");
+ Inputs.Code = MainFile.code();
+ Inputs.ExtraFiles["header.h"] = "";
+ Inputs.ErrorOK = true; // missing header
+ auto AST = build();
+
+ ASSERT_THAT(
+ Recorded.Includes.all(),
+ testing::ElementsAre(spelled("./header.h"), spelled("missing.h")));
+
+ auto &H = Recorded.Includes.all().front();
+ EXPECT_EQ(H.Line, 2u);
+ EXPECT_EQ(H.HashLocation,
+ AST.sourceManager().getComposedLoc(
+ AST.sourceManager().getMainFileID(), MainFile.point("H")));
+ EXPECT_EQ(H.Resolved, AST.fileManager().getFile("header.h").get());
+
+ auto &M = Recorded.Includes.all().back();
+ EXPECT_EQ(M.Line, 3u);
+ EXPECT_EQ(M.HashLocation,
+ AST.sourceManager().getComposedLoc(
+ AST.sourceManager().getMainFileID(), MainFile.point("M")));
+ EXPECT_EQ(M.Resolved, nullptr);
+}
+
+TEST_F(RecordPPTest, CapturesMacroRefs) {
+ llvm::Annotations Header(R"cpp(
+ #define $def^X 1
+
+ // Refs, but not in main file.
+ #define Y X
+ int one = X;
+ )cpp");
+ llvm::Annotations MainFile(R"cpp(
+ #define EARLY X // not a ref, no definition
+ #include "header.h"
+ #define LATE ^X
+ #define LATE2 ^X // a ref even if not expanded
+
+ int uno = ^X;
+ int jeden = $exp^LATE; // a ref in LATE's expansion
+
+ #define IDENT(X) X // not a ref, shadowed
+ int eins = IDENT(^X);
+
+ #undef ^X
+ // Not refs, rather a new macro with the same name.
+ #define X 2
+ int two = X;
+ )cpp");
+ Inputs.Code = MainFile.code();
+ Inputs.ExtraFiles["header.h"] = Header.code();
+ auto AST = build();
+ const auto &SM = AST.sourceManager();
+
+ SourceLocation Def = SM.getComposedLoc(
+ SM.translateFile(AST.fileManager().getFile("header.h").get()),
+ Header.point("def"));
+ ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty()));
+ Symbol OrigX = Recorded.MacroReferences.front().Symbol;
+ EXPECT_EQ("X", OrigX.macro().Name->getName());
+ EXPECT_EQ(Def, OrigX.macro().Definition);
+
+ std::vector<unsigned> RefOffsets;
+ std::vector<unsigned> ExpOffsets; // Expansion locs of refs in macro locs.
+ std::vector<SourceLocation> RefMacroLocs;
+ for (const auto &Ref : Recorded.MacroReferences) {
+ if (Ref.Symbol == OrigX) {
+ auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation);
+ if (FID == SM.getMainFileID()) {
+ RefOffsets.push_back(Off);
+ } else if (Ref.RefLocation.isMacroID() &&
+ SM.isWrittenInMainFile(SM.getExpansionLoc(Ref.RefLocation))) {
+ ExpOffsets.push_back(
+ SM.getDecomposedExpansionLoc(Ref.RefLocation).second);
+ } else {
+ ADD_FAILURE() << Ref.RefLocation.printToString(SM);
+ }
+ }
+ }
+ EXPECT_THAT(RefOffsets, ElementsAreArray(MainFile.points()));
+ EXPECT_THAT(ExpOffsets, ElementsAreArray(MainFile.points("exp")));
+}
+
+// Matches an Include* on the specified line;
+MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; }
+
+TEST(RecordedIncludesTest, Match) {
+ // We're using synthetic data, but need a FileManager to obtain FileEntry*s.
+ // Ensure it doesn't do any actual IO.
+ auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ FileManager FM(FileSystemOptions{});
+ const FileEntry *A = FM.getVirtualFile("/path/a", /*Size=*/0, time_t{});
+ const FileEntry *B = FM.getVirtualFile("/path/b", /*Size=*/0, time_t{});
+
+ RecordedPP::RecordedIncludes Includes;
+ Includes.add(Include{"a", A, SourceLocation(), 1});
+ Includes.add(Include{"a2", A, SourceLocation(), 2});
+ Includes.add(Include{"b", B, SourceLocation(), 3});
+ Includes.add(Include{"vector", B, SourceLocation(), 4});
+ Includes.add(Include{"vector", B, SourceLocation(), 5});
+ Includes.add(Include{"missing", nullptr, SourceLocation(), 6});
+
+ EXPECT_THAT(Includes.match(A), ElementsAre(line(1), line(2)));
+ EXPECT_THAT(Includes.match(B), ElementsAre(line(3), line(4), line(5)));
+ EXPECT_THAT(Includes.match(*tooling::stdlib::Header::named("<vector>")),
+ ElementsAre(line(4), line(5)));
}
class PragmaIncludeTest : public ::testing::Test {