From 766a08df12c111b15ed51d0fcac06042d2f68cd6 Mon Sep 17 00:00:00 2001 From: Ben Barham Date: Thu, 15 Jul 2021 18:24:09 -0700 Subject: [PATCH] [Frontend] Only compile modules if not already finalized It was possible to re-add a module to a shared in-memory module cache when search paths are changed. This can eventually cause a crash if the original module is referenced after this occurs. 1. Module A depends on B 2. B exists in two paths C and D 3. First run only has C on the search path, finds A and B and loads them 4. Second run adds D to the front of the search path. A is loaded and contains a reference to the already compiled module from C. But searching finds the module from D instead, causing a mismatch 5. B and the modules that depend on it are considered out of date and thus rebuilt 6. The recompiled module A is added to the in-memory cache, freeing the previously inserted one This can never occur from a regular clang process, but is very easy to do through the API - whether through the use of a shared case or just running multiple compilations from a single `CompilerInstance`. Update the compilation to return early if a module is already finalized so that the pre-condition in the in-memory module cache holds. Resolves rdar://78180255 Differential Revision: https://reviews.llvm.org/D105328 --- clang/include/clang/Basic/DiagnosticCommonKinds.td | 2 + .../clang/Basic/DiagnosticSerializationKinds.td | 3 + clang/include/clang/Serialization/ASTReader.h | 3 + clang/lib/Frontend/CompilerInstance.cpp | 9 ++ clang/lib/Serialization/ASTReader.cpp | 33 ++-- clang/unittests/Serialization/CMakeLists.txt | 2 + clang/unittests/Serialization/ModuleCacheTest.cpp | 179 +++++++++++++++++++++ 7 files changed, 220 insertions(+), 11 deletions(-) create mode 100644 clang/unittests/Serialization/ModuleCacheTest.cpp diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index eea5d8e..4dff337 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -111,6 +111,8 @@ def err_module_cycle : Error<"cyclic dependency in module '%0': %1">, DefaultFatal; def err_module_prebuilt : Error< "error in loading module '%0' from prebuilt module path">, DefaultFatal; +def err_module_rebuild_finalized : Error< + "cannot rebuild module '%0' as it is already finalized">, DefaultFatal; def note_pragma_entered_here : Note<"#pragma entered here">; def note_decl_hiding_tag_type : Note< "%1 %0 is hidden by a non-type declaration of %0 here">; diff --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td index ce48833..bf3221b 100644 --- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td @@ -69,6 +69,9 @@ def err_module_file_not_module : Error< "AST file '%0' was not built as a module">, DefaultFatal; def err_module_file_missing_top_level_submodule : Error< "module file '%0' is missing its top-level submodule">, DefaultFatal; +def note_module_file_conflict : Note< + "this is generally caused by modules with the same name found in multiple " + "paths">; def remark_module_import : Remark< "importing module '%0'%select{| into '%3'}2 from '%1'">, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 6932d9c..4819a5a 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1401,6 +1401,9 @@ private: llvm::iterator_range getModulePreprocessedEntities(ModuleFile &Mod) const; + bool canRecoverFromOutOfDate(StringRef ModuleFileName, + unsigned ClientLoadCapabilities); + public: class ModuleDeclIterator : public llvm::iterator_adaptor_base< diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index a6e2329..c642af1 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1054,6 +1054,15 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, [](CompilerInstance &) {}) { llvm::TimeTraceScope TimeScope("Module Compile", ModuleName); + // Never compile a module that's already finalized - this would cause the + // existing module to be freed, causing crashes if it is later referenced + if (ImportingInstance.getModuleCache().isPCMFinal(ModuleFileName)) { + ImportingInstance.getDiagnostics().Report( + ImportLoc, diag::err_module_rebuild_finalized) + << ModuleName; + return false; + } + // Construct a compiler invocation for creating this module. auto Invocation = std::make_shared(ImportingInstance.getInvocation()); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 98972af..faf6f3d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2764,7 +2764,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, // If requested by the caller and the module hasn't already been read // or compiled, mark modules on error as out-of-date. if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) && - !ModuleMgr.getModuleCache().isPCMFinal(F.FileName)) + canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) return OutOfDate; if (!AllowASTWithCompilerErrors) { @@ -2850,9 +2850,14 @@ ASTReader::ReadControlBlock(ModuleFile &F, StoredSignature, Capabilities); // If we diagnosed a problem, produce a backtrace. - if (isDiagnosedResult(Result, Capabilities)) + bool recompilingFinalized = + Result == OutOfDate && (Capabilities & ARR_OutOfDate) && + getModuleManager().getModuleCache().isPCMFinal(F.FileName); + if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized) Diag(diag::note_module_file_imported_by) << F.FileName << !F.ModuleName.empty() << F.ModuleName; + if (recompilingFinalized) + Diag(diag::note_module_file_conflict); switch (Result) { case Failure: return Failure; @@ -2918,7 +2923,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, F.Kind != MK_ExplicitModule && F.Kind != MK_PrebuiltModule) { auto BuildDir = PP.getFileManager().getDirectory(Blob); if (!BuildDir || *BuildDir != M->Directory) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) Diag(diag::err_imported_module_relocated) << F.ModuleName << Blob << M->Directory->getName(); return OutOfDate; @@ -3928,7 +3933,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation & DisableValidationForModuleKind::Module) && !ModMap) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) { + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) { if (auto ASTFE = M ? M->getASTFile() : None) { // This module was defined by an imported (explicit) module. Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName @@ -3959,7 +3964,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, assert((ImportedBy || F.Kind == MK_ImplicitModule) && "top-level import should be verified"); bool NotImported = F.Kind == MK_ImplicitModule && !ImportedBy; - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) Diag(diag::err_imported_module_modmap_changed) << F.ModuleName << (NotImported ? F.FileName : ImportedBy->FileName) << ModMap->getName() << F.ModuleMapPath << NotImported; @@ -3970,13 +3975,13 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, for (unsigned I = 0, N = Record[Idx++]; I < N; ++I) { // FIXME: we should use input files rather than storing names. std::string Filename = ReadPath(F, Record, Idx); - auto F = FileMgr.getFile(Filename, false, false); - if (!F) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) + auto SF = FileMgr.getFile(Filename, false, false); + if (!SF) { + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) Error("could not find file '" + Filename +"' referenced by AST file"); return OutOfDate; } - AdditionalStoredMaps.insert(*F); + AdditionalStoredMaps.insert(*SF); } // Check any additional module map files (e.g. module.private.modulemap) @@ -3986,7 +3991,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, // Remove files that match // Note: SmallPtrSet::erase is really remove if (!AdditionalStoredMaps.erase(ModMap)) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) Diag(diag::err_module_different_modmap) << F.ModuleName << /*new*/0 << ModMap->getName(); return OutOfDate; @@ -3997,7 +4002,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, // Check any additional module map files that are in the pcm, but not // found in header search. Cases that match are already removed. for (const FileEntry *ModMap : AdditionalStoredMaps) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) + if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) Diag(diag::err_module_different_modmap) << F.ModuleName << /*not new*/1 << ModMap->getName(); return OutOfDate; @@ -5924,6 +5929,12 @@ ASTReader::getModulePreprocessedEntities(ModuleFile &Mod) const { PreprocessingRecord::iterator()); } +bool ASTReader::canRecoverFromOutOfDate(StringRef ModuleFileName, + unsigned int ClientLoadCapabilities) { + return ClientLoadCapabilities & ARR_OutOfDate && + !getModuleManager().getModuleCache().isPCMFinal(ModuleFileName); +} + llvm::iterator_range ASTReader::getModuleFileLevelDecls(ModuleFile &Mod) { return llvm::make_range( diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt index f143f28..c0a7984 100644 --- a/clang/unittests/Serialization/CMakeLists.txt +++ b/clang/unittests/Serialization/CMakeLists.txt @@ -6,12 +6,14 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(SerializationTests InMemoryModuleCacheTest.cpp + ModuleCacheTest.cpp ) clang_target_link_libraries(SerializationTests PRIVATE clangAST clangBasic + clangFrontend clangLex clangSema clangSerialization diff --git a/clang/unittests/Serialization/ModuleCacheTest.cpp b/clang/unittests/Serialization/ModuleCacheTest.cpp new file mode 100644 index 0000000..6cffbc2 --- /dev/null +++ b/clang/unittests/Serialization/ModuleCacheTest.cpp @@ -0,0 +1,179 @@ +//===- unittests/Serialization/ModuleCacheTest.cpp - CI tests -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/FileManager.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/HeaderSearch.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +class ModuleCacheTest : public ::testing::Test { + void SetUp() override { + ASSERT_FALSE(sys::fs::createUniqueDirectory("modulecache-test", TestDir)); + + ModuleCachePath = SmallString<256>(TestDir); + sys::path::append(ModuleCachePath, "mcp"); + ASSERT_FALSE(sys::fs::create_directories(ModuleCachePath)); + } + + void TearDown() override { sys::fs::remove_directories(TestDir); } + +public: + SmallString<256> TestDir; + SmallString<256> ModuleCachePath; + + void addFile(StringRef Path, StringRef Contents) { + ASSERT_FALSE(sys::path::is_absolute(Path)); + + SmallString<256> AbsPath(TestDir); + sys::path::append(AbsPath, Path); + + std::error_code EC; + ASSERT_FALSE( + sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath))); + llvm::raw_fd_ostream OS(AbsPath, EC); + ASSERT_FALSE(EC); + OS << Contents; + } + + void addDuplicateFrameworks() { + addFile("test.m", R"cpp( + @import Top; + )cpp"); + + addFile("frameworks/Top.framework/Headers/top.h", R"cpp( + @import M; + )cpp"); + addFile("frameworks/Top.framework/Modules/module.modulemap", R"cpp( + framework module Top [system] { + header "top.h" + export * + } + )cpp"); + + addFile("frameworks/M.framework/Headers/m.h", R"cpp( + void foo(); + )cpp"); + addFile("frameworks/M.framework/Modules/module.modulemap", R"cpp( + framework module M [system] { + header "m.h" + export * + } + )cpp"); + + addFile("frameworks2/M.framework/Headers/m.h", R"cpp( + void foo(); + )cpp"); + addFile("frameworks2/M.framework/Modules/module.modulemap", R"cpp( + framework module M [system] { + header "m.h" + export * + } + )cpp"); + } +}; + +TEST_F(ModuleCacheTest, CachedModuleNewPath) { + addDuplicateFrameworks(); + + SmallString<256> MCPArg("-fmodules-cache-path="); + MCPArg.append(ModuleCachePath); + IntrusiveRefCntPtr Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions()); + + // First run should pass with no errors + const char *Args[] = {"clang", "-fmodules", "-Fframeworks", + MCPArg.c_str(), "-working-directory", TestDir.c_str(), + "test.m"}; + std::shared_ptr Invocation = + createInvocationFromCommandLine(Args, Diags); + ASSERT_TRUE(Invocation); + CompilerInstance Instance; + Instance.setDiagnostics(Diags.get()); + Instance.setInvocation(Invocation); + SyntaxOnlyAction Action; + ASSERT_TRUE(Instance.ExecuteAction(Action)); + ASSERT_FALSE(Diags->hasErrorOccurred()); + + // Now add `frameworks2` to the search path. `Top.pcm` will have a reference + // to the `M` from `frameworks`, but a search will find the `M` from + // `frameworks2` - causing a mismatch and it to be considered out of date. + // + // Normally this would be fine - `M` and the modules it depends on would be + // rebuilt. However, since we have a shared module cache and thus an already + // finalized `Top`, recompiling `Top` will cause the existing module to be + // removed from the cache, causing possible crashed if it is ever used. + // + // Make sure that an error occurs instead. + const char *Args2[] = {"clang", "-fmodules", "-Fframeworks2", + "-Fframeworks", MCPArg.c_str(), "-working-directory", + TestDir.c_str(), "test.m"}; + std::shared_ptr Invocation2 = + createInvocationFromCommandLine(Args2, Diags); + ASSERT_TRUE(Invocation2); + CompilerInstance Instance2(Instance.getPCHContainerOperations(), + &Instance.getModuleCache()); + Instance2.setDiagnostics(Diags.get()); + Instance2.setInvocation(Invocation2); + SyntaxOnlyAction Action2; + ASSERT_FALSE(Instance2.ExecuteAction(Action2)); + ASSERT_TRUE(Diags->hasErrorOccurred()); +} + +TEST_F(ModuleCacheTest, CachedModuleNewPathAllowErrors) { + addDuplicateFrameworks(); + + SmallString<256> MCPArg("-fmodules-cache-path="); + MCPArg.append(ModuleCachePath); + IntrusiveRefCntPtr Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions()); + + // First run should pass with no errors + const char *Args[] = {"clang", "-fmodules", "-Fframeworks", + MCPArg.c_str(), "-working-directory", TestDir.c_str(), + "test.m"}; + std::shared_ptr Invocation = + createInvocationFromCommandLine(Args, Diags); + ASSERT_TRUE(Invocation); + CompilerInstance Instance; + Instance.setDiagnostics(Diags.get()); + Instance.setInvocation(Invocation); + SyntaxOnlyAction Action; + ASSERT_TRUE(Instance.ExecuteAction(Action)); + ASSERT_FALSE(Diags->hasErrorOccurred()); + + // Same as `CachedModuleNewPath` but while allowing errors. This is a hard + // failure where the module wasn't created, so it should still fail. + const char *Args2[] = { + "clang", "-fmodules", "-Fframeworks2", + "-Fframeworks", MCPArg.c_str(), "-working-directory", + TestDir.c_str(), "-Xclang", "-fallow-pcm-with-compiler-errors", + "test.m"}; + std::shared_ptr Invocation2 = + createInvocationFromCommandLine(Args2, Diags); + ASSERT_TRUE(Invocation2); + CompilerInstance Instance2(Instance.getPCHContainerOperations(), + &Instance.getModuleCache()); + Instance2.setDiagnostics(Diags.get()); + Instance2.setInvocation(Invocation2); + SyntaxOnlyAction Action2; + ASSERT_FALSE(Instance2.ExecuteAction(Action2)); + ASSERT_TRUE(Diags->hasErrorOccurred()); +} + +} // anonymous namespace -- 2.7.4