From b672638dbc7cec9a51826520e8f8aef276bac3ee Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 7 Apr 2022 17:05:54 +0200 Subject: [PATCH] [clang][deps] Ensure deterministic filename case The dependency scanner can reuse single FileManager instance across multiple translation units. This may lead to non-deterministic output depending on which TU gets processed first. One of the problems is that Clang uses DirectoryEntry::getName in the header search algorithm. This function returns the path that was first used to construct the (shared) entry in FileManager. Using DirectoryEntryRef::getName instead preserves the case as it was spelled out for the current "get directory entry" request. rdar://90647508 Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D123229 --- clang/include/clang/Lex/DirectoryLookup.h | 4 ++ clang/lib/Lex/HeaderSearch.cpp | 4 +- .../ClangScanDeps/header-search-case-sensitivity.c | 50 ++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 clang/test/ClangScanDeps/header-search-case-sensitivity.c diff --git a/clang/include/clang/Lex/DirectoryLookup.h b/clang/include/clang/Lex/DirectoryLookup.h index da2ae9f..3602662 100644 --- a/clang/include/clang/Lex/DirectoryLookup.h +++ b/clang/include/clang/Lex/DirectoryLookup.h @@ -91,6 +91,10 @@ public: return isNormalDir() ? &u.Dir.getDirEntry() : nullptr; } + Optional getDirRef() const { + return isNormalDir() ? Optional(u.Dir) : None; + } + /// getFrameworkDir - Return the directory that this framework refers to. /// const DirectoryEntry *getFrameworkDir() const { diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index d16b9a5..400a6c8 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -436,10 +436,10 @@ Optional DirectoryLookup::LookupFile( SmallString<1024> TmpDir; if (isNormalDir()) { // Concatenate the requested file onto the directory. - TmpDir = getDir()->getName(); + TmpDir = getDirRef()->getName(); llvm::sys::path::append(TmpDir, Filename); if (SearchPath) { - StringRef SearchPathRef(getDir()->getName()); + StringRef SearchPathRef(getDirRef()->getName()); SearchPath->clear(); SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); } diff --git a/clang/test/ClangScanDeps/header-search-case-sensitivity.c b/clang/test/ClangScanDeps/header-search-case-sensitivity.c new file mode 100644 index 0000000..e8d2415 --- /dev/null +++ b/clang/test/ClangScanDeps/header-search-case-sensitivity.c @@ -0,0 +1,50 @@ +// This test checks that reusing FileManager produces deterministic results on case-insensitive filesystems. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- dir1/arm/lower.h +//--- dir2/ARM/upper.h +//--- t1.c +#include "upper.h" +//--- t2.c +#include "arm/lower.h" + +//--- cdb.json.template +[{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/t1.c -I DIR/dir2/ARM -I DIR/dir1", + "file": "DIR/t1.c" +},{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/t2.c -I DIR/dir2 -I DIR/dir1", + "file": "DIR/t2.c" +}] + +//--- cdb-rev.json.template +[{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/t2.c -I DIR/dir2 -I DIR/dir1", + "file": "DIR/t2.c" +},{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/t1.c -I DIR/dir2/ARM -I DIR/dir1", + "file": "DIR/t1.c" +}] + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb-rev.json.template > %t/cdb-rev.json + +// RUN: clang-scan-deps -compilation-database=%t/cdb.json -format make -j 1 | sed 's:\\\\\?:/:g' | FileCheck %s + +// In the reversed case, Clang starts by scanning "t2.c". When looking up the "arm/lower.h" header, +// the string is appended to "DIR/dir2". That file ("DIR/dir2/arm/lower.h") doesn't exist, but when +// learning so, the FileManager stats and caches the parent directory ("DIR/dir2/arm"), using the +// UID as the key. +// When scanning "t1.c" later on, the "DIR/dir2/ARM" search directory is assigned the **same** +// directory entry (with lowercase "arm"), since they share the UID on case-insensitive filesystems. +// To preserve the correct case throughout the compiler for any file within that directory, it's +// important to use the spelling actually used, not just the cached one. +// RUN: clang-scan-deps -compilation-database=%t/cdb-rev.json -format make -j 1 | sed 's:\\\\\?:/:g' | FileCheck %s + +// CHECK: ARM/upper.h -- 2.7.4