/// or a header-name for C++20 header units.
std::string ModuleName;
- /// The context hash of a module represents the set of compiler options that
- /// may make one version of a module incompatible with another. This includes
- /// things like language mode, predefined macros, header search paths, etc...
+ /// The context hash of a module represents the compiler options that affect
+ /// the resulting command-line invocation.
+ ///
+ /// Modules with the same name and ContextHash but different invocations could
+ /// cause non-deterministic build results.
///
/// Modules with the same name but a different \c ContextHash should be
/// treated as separate modules for the purpose of a build.
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/StringSaver.h"
using namespace clang;
CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
+ // Remove any macro definitions that are explicitly ignored.
+ if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
+ llvm::erase_if(
+ CI.getPreprocessorOpts().Macros,
+ [&CI](const std::pair<std::string, bool> &Def) {
+ StringRef MacroDef = Def.first;
+ return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
+ llvm::CachedHashString(MacroDef.split('=').first));
+ });
+ // Remove the now unused option.
+ CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
+ }
+
// Report the prebuilt modules this module uses.
for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
return serializeCompilerInvocation(CI);
}
+static std::string getModuleContextHash(const ModuleDeps &MD) {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<16>,
+ llvm::support::endianness::native>
+ HashBuilder;
+ SmallString<32> Scratch;
+
+ // Hash the compiler version and serialization version to ensure the module
+ // will be readable.
+ HashBuilder.add(getClangFullRepositoryVersion());
+ HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
+
+ // Hash the BuildInvocation without any input files.
+ SmallVector<const char *, 32> DummyArgs;
+ MD.BuildInvocation.generateCC1CommandLine(DummyArgs, [&](const Twine &Arg) {
+ Scratch.clear();
+ StringRef Str = Arg.toStringRef(Scratch);
+ HashBuilder.add(Str);
+ return "<unused>";
+ });
+
+ // Hash the input file paths and module dependencies. These paths may differ
+ // even if the invocation is identical if they depend on the contents of the
+ // files in the TU -- for example, case-insensitive paths to modulemap files.
+ // Usually such a case would indicate a missed optimization to canonicalize,
+ // but it may be difficult to canonicalize all cases when there is a VFS.
+ HashBuilder.add(MD.ClangModuleMapFile);
+ for (const auto &Dep : MD.PrebuiltModuleDeps)
+ HashBuilder.add(Dep.PCMFile);
+ for (const auto &ID : MD.ClangModuleDeps) {
+ HashBuilder.add(ID.ModuleName);
+ HashBuilder.add(ID.ContextHash);
+ }
+
+ // Hash options that affect which callbacks are made for outputs.
+ HashBuilder.add(MD.HadDependencyFile);
+ HashBuilder.add(MD.HadSerializedDiagnostics);
+
+ llvm::BLAKE3Result<16> Hash = HashBuilder.final();
+ std::array<uint64_t, 2> Words;
+ static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
+ std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
+ return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
+}
+
std::vector<std::string>
ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
return serializeCompilerInvocation(BuildInvocation);
.DiagnosticSerializationFile.empty();
MD.HadDependencyFile =
!MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
- // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
- // the context hash since it can affect the command-line.
- MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
llvm::DenseSet<const Module *> AddedModules;
addAllSubmoduleDeps(M, MD, AddedModules);
+ // Do this last since it requires the dependencies.
+ MD.ID.ContextHash = getModuleContextHash(MD);
return MD.ID;
}
--- /dev/null
+// Ensure '-DFOO -fmodules-ignore-macro=FOO' and '' both produce the same
+// canonical module build command.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN: -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK: {
+// CHECK: "command-line": [
+// CHECK-NOT: "FOO"
+// CHECK-NOT: "-fmodules-ignore-macro
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH_NO_FOO:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "command-line": [
+// CHECK: "-D"
+// CHECK-NEXT: "FOO"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH_FOO:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-DFOO"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_FOO]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK: "-DFOO"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu2.c"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK: "-DFOO"
+// CHECK: "-fmodules-ignore-macro=FOO"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu3.c"
+
+//--- cdb.json.template
+[
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu1.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -DFOO -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu2.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -DFOO -fmodules-ignore-macro=FOO -fsyntax-only DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu3.c"
+ },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"
+
+//--- tu3.c
+#include "Mod.h"
--- /dev/null
+// Ensure the path to the modulemap input is included in the module context hash
+// irrespective of other TU command-line arguments, as it effects the canonical
+// module build command. In this test we use the difference in spelling between
+// module.modulemap and module.map, but it also applies to situations such as
+// differences in case-insensitive paths if they are not canonicalized away.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN: -format experimental-full > %t/deps.json
+
+// RUN: mv %t/module.modulemap %t/module.map
+// RUN: echo 'AFTER_MOVE' >> %t/deps.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN: -format experimental-full >> %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK: {
+// CHECK: "command-line": [
+// CHECK: "{{.*}}module.modulemap"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH1:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH1]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-LABEL: AFTER_MOVE
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK: {
+// CHECK-NOT: [[HASH1]]
+// CHECK: "command-line": [
+// CHECK: "{{.*}}module.map"
+// CHECK: ]
+// CHECK-NOT: [[HASH1]]
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash":
+// CHECK-NOT: [[HASH1]]
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+
+//--- cdb.json.template
+[
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu.c"
+ }
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu.c
+#include "Mod.h"
--- /dev/null
+// If secondary output files such as .d are enabled, ensure it affects the
+// module context hash since it may impact the resulting module build commands.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN: -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK: {
+// CHECK: "command-line": [
+// CHECK: "-dependency-file"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH1:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "command-line": [
+// CHECK-NOT: "-dependency-file"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH2:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH1]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK: "-MF"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH2]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-MF"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu2.c"
+
+//--- cdb.json.template
+[
+ {
+ "directory": "DIR",
+ "command": "clang -MD -MF DIR/tu1.d -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu1.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu2.c"
+ },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"
--- /dev/null
+// Differences in -W warning options should result in different canonical module
+// build commands.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN: -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK: {
+// CHECK: "command-line": [
+// CHECK: "-Wall"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH1:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "command-line": [
+// CHECK-NOT: "-Wall"
+// CHECK: ]
+// CHECK: "context-hash": "[[HASH2:.*]]"
+// CHECK: "name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "translation-units": [
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH1]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK: "-Wall"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT: }
+// CHECK-NEXT: {
+// CHECK: "clang-module-deps": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "context-hash": "[[HASH2]]"
+// CHECK-NEXT: "module-name": "Mod"
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-Wall"
+// CHECK: ]
+// CHECK: "input-file": "{{.*}}tu2.c"
+
+//--- cdb.json.template
+[
+ {
+ "directory": "DIR",
+ "command": "clang -Wall -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu1.c"
+ },
+ {
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+ "file": "DIR/tu2.c"
+ },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"