From: Ben Langmuir Date: Mon, 14 Nov 2022 22:51:54 +0000 (-0800) Subject: [clang][deps] Avoid leaking modulemap paths across unrelated imports X-Git-Tag: upstream/17.0.6~27568 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=05ec16d90deb747414c8534f98617d25d90bb714;p=platform%2Fupstream%2Fllvm.git [clang][deps] Avoid leaking modulemap paths across unrelated imports Use a FileEntryRef when retrieving modulemap paths in the scanner so that we use a path compatible with the original module import, rather than a FileEntry which can allow unrelated modules to leak paths into how we build a module due to FileManager mutating the path. Note: the current change prevents an "unrelated" path, but does not change how VFS mapped paths are handled (which would be calling getNameAsRequested) nor canonicalize the path. Differential Revision: https://reviews.llvm.org/D137989 --- diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index da8b9a9..23c67fc 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2338,8 +2338,7 @@ public: /// Visit all the top-level module maps loaded when building the given module /// file. void visitTopLevelModuleMaps(serialization::ModuleFile &MF, - llvm::function_ref< - void(const FileEntry *)> Visitor); + llvm::function_ref Visitor); bool isProcessingUpdateRecords() { return ProcessingUpdateRecords; } }; diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index 5062c4c..1cc6208 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -237,7 +237,7 @@ private: llvm::function_ref Optimize) const; /// Collect module map files for given modules. - llvm::StringSet<> + llvm::DenseSet collectModuleMapFiles(ArrayRef ClangModuleDeps) const; /// Add module map files to the invocation, if needed. diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 45f04ea..76ef0cb 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -638,11 +638,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, if (&MF != &PrimaryModule) CI.getFrontendOpts().ModuleFiles.push_back(MF.FileName); - ASTReader->visitTopLevelModuleMaps( - PrimaryModule, [&](const FileEntry *FE) { - CI.getFrontendOpts().ModuleMapFiles.push_back( - std::string(FE->getName())); - }); + ASTReader->visitTopLevelModuleMaps(PrimaryModule, [&](FileEntryRef FE) { + CI.getFrontendOpts().ModuleMapFiles.push_back( + std::string(FE.getName())); + }); } // Set up the input file for replay purposes. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 05e6c6e..cf37162 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9164,14 +9164,14 @@ void ASTReader::visitInputFiles(serialization::ModuleFile &MF, void ASTReader::visitTopLevelModuleMaps( serialization::ModuleFile &MF, - llvm::function_ref Visitor) { + llvm::function_ref Visitor) { unsigned NumInputs = MF.InputFilesLoaded.size(); for (unsigned I = 0; I < NumInputs; ++I) { InputFileInfo IFI = readInputFileInfo(MF, I + 1); if (IFI.TopLevelModuleMap) // FIXME: This unnecessarily re-reads the InputFileInfo. if (auto FE = getInputFile(MF, I + 1).getFile()) - Visitor(FE); + Visitor(*FE); } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1dcb542..cdd65a4 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1486,12 +1486,14 @@ namespace { /// An input file. struct InputFileEntry { - const FileEntry *File; + FileEntryRef File; bool IsSystemFile; bool IsTransient; bool BufferOverridden; bool IsTopLevelModuleMap; uint32_t ContentHash[2]; + + InputFileEntry(FileEntryRef File) : File(File) {} }; } // namespace @@ -1541,8 +1543,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, if (!IsSLocAffecting[I]) continue; - InputFileEntry Entry; - Entry.File = Cache->OrigEntry; + InputFileEntry Entry(*Cache->OrigEntry); Entry.IsSystemFile = isSystem(File.getFileCharacteristic()); Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; @@ -1557,9 +1558,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, if (MemBuff) ContentHash = hash_value(MemBuff->getBuffer()); else - // FIXME: The path should be taken from the FileEntryRef. PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content) - << Entry.File->getName(); + << Entry.File.getName(); } auto CH = llvm::APInt(64, ContentHash); Entry.ContentHash[0] = @@ -1599,14 +1599,13 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, RecordData::value_type Record[] = { INPUT_FILE, InputFileOffsets.size(), - (uint64_t)Entry.File->getSize(), + (uint64_t)Entry.File.getSize(), (uint64_t)getTimestampForOutput(Entry.File), Entry.BufferOverridden, Entry.IsTransient, Entry.IsTopLevelModuleMap}; - // FIXME: The path should be taken from the FileEntryRef. - EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName()); + EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getName()); } // Emit content hash for this file. diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index f0fed7d..93720f7 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -130,22 +130,22 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs( auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps); for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) { + // TODO: Track these as `FileEntryRef` to simplify the equality check below. + auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile); + assert(ModuleMapEntry && "module map file entry not found"); + // Don't report module maps describing eagerly-loaded dependency. This // information will be deserialized from the PCM. // TODO: Verify this works fine when modulemap for module A is eagerly // loaded from A.pcm, and module map passed on the command line contains // definition of a submodule: "explicit module A.Private { ... }". - if (EagerLoadModules && DepModuleMapFiles.contains(ModuleMapFile)) + if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry)) continue; - // TODO: Track these as `FileEntryRef` to simplify the equality check below. - auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile); - assert(ModuleMapEntry && "module map file entry not found"); - // Don't report module map file of the current module unless it also // describes a dependency (for symmetry). if (*ModuleMapEntry == *CurrentModuleMapEntry && - !DepModuleMapFiles.contains(ModuleMapFile)) + !DepModuleMapFiles.contains(*ModuleMapEntry)) continue; CI.getFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile); @@ -181,13 +181,16 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs( return CI; } -llvm::StringSet<> ModuleDepCollector::collectModuleMapFiles( +llvm::DenseSet ModuleDepCollector::collectModuleMapFiles( ArrayRef ClangModuleDeps) const { - llvm::StringSet<> ModuleMapFiles; + llvm::DenseSet ModuleMapFiles; for (const ModuleID &MID : ClangModuleDeps) { ModuleDeps *MD = ModuleDepsByID.lookup(MID); assert(MD && "Inconsistent dependency info"); - ModuleMapFiles.insert(MD->ClangModuleMapFile); + // TODO: Track ClangModuleMapFile as `FileEntryRef`. + auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile); + assert(FE && "Missing module map file that was previously found"); + ModuleMapFiles.insert(*FE); } return ModuleMapFiles; } @@ -447,10 +450,10 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { addAllAffectingClangModules(M, MD, SeenDeps); MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( - *MF, [&](const FileEntry *FE) { - if (FE->getName().endswith("__inferred_module.map")) + *MF, [&](FileEntryRef FE) { + if (FE.getName().endswith("__inferred_module.map")) return; - MD.ModuleMapFileDeps.emplace_back(FE->getName()); + MD.ModuleMapFileDeps.emplace_back(FE.getName()); }); CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs( diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c new file mode 100644 index 0000000..5dde770 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c @@ -0,0 +1,94 @@ +// Check that the path of an imported modulemap file is not influenced by +// modules outside that module's dependency graph. Specifically, the "Foo" +// module below does not transitively import Mod via a symlink, so it should not +// see the symlinked path. + +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json +// RUN: ln -s module %t/include/symlink-to-module + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \ +// RUN: -format experimental-full -mode=preprocess-dependency-directives \ +// RUN: -optimize-args -module-files-dir %t/build > %t/deps.json + +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: "modules": [ +// CHECK: { +// CHECK: "command-line": [ +// CHECK: "-fmodule-map-file=[[PREFIX]]/include/module/module.modulemap" +// CHECK: ] +// CHECK: "name": "Foo" +// CHECK: } +// CHECK: { +// CHECK: "command-line": [ +// FIXME: canonicalize this path +// CHECK: "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap" +// CHECK: ] +// CHECK: "name": "Foo_Private" +// CHECK: } +// CHECK: { +// CHECK: "command-line": [ +// CHECK: "[[PREFIX]]/include/module/module.modulemap" +// CHECK: ] +// CHECK: "name": "Mod" +// CHECK: } +// CHECK: { +// CHECK: "command-line": [ +// FIXME: canonicalize this path +// CHECK: "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap" +// CHECK: ] +// CHECK: "name": "Other" +// CHECK: } +// CHECK: { +// CHECK: "command-line": [ +// FIXME: canonicalize this path +// CHECK: "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap" +// CHECK: ] +// CHECK: "name": "Test" +// CHECK: } +// CHECK: ] + +//--- cdb.json.in +[{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/test.c -F DIR/Frameworks -I DIR/include -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/module-cache", + "file": "DIR/test.c" +}] + +//--- include/module/module.modulemap +module Mod { header "mod.h" export * } + +//--- include/module/mod.h + +//--- include/module.modulemap +module Other { header "other.h" export * } + +//--- include/other.h +#include "symlink-to-module/mod.h" +#include "module/mod.h" + +//--- Frameworks/Foo.framework/Modules/module.modulemap +framework module Foo { header "Foo.h" export * } +//--- Frameworks/Foo.framework/Modules/module.private.modulemap +framework module Foo_Private { header "Priv.h" export * } + +//--- Frameworks/Foo.framework/Headers/Foo.h +#include "module/mod.h" + +//--- Frameworks/Foo.framework/PrivateHeaders/Priv.h +#include +#include "other.h" + +//--- module.modulemap +module Test { header "test.h" export * } + +//--- test.h +#include +#include + +//--- test.c +#include "test.h"