Reland "Add sanitizer metadata attributes to clang IR gen."
authorMitch Phillips <31459023+hctim@users.noreply.github.com>
Mon, 13 Jun 2022 19:12:31 +0000 (12:12 -0700)
committerMitch Phillips <31459023+hctim@users.noreply.github.com>
Mon, 13 Jun 2022 19:23:27 +0000 (12:23 -0700)
RE-LAND (reverts a revert):
This reverts commit 8e1f47b596b28fbc88cf32e8f46eb2fecb145fb2.

This patch adds generation of sanitizer metadata attributes (which were
added in D126100) to the clang frontend.

We still currently generate the llvm.asan.globals that's consumed by
the IR pass, but the plan is to eventually migrate off of that onto
purely debuginfo and these IR attributes.

Reviewed By: vitalybuka, kstoimenov

Differential Revision: https://reviews.llvm.org/D126929

clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/CodeGen/SanitizerMetadata.cpp
clang/lib/CodeGen/SanitizerMetadata.h
clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt [new file with mode: 0644]
clang/test/CodeGen/asan-globals.cpp
clang/test/CodeGen/sanitize-init-order.cpp
clang/test/CodeGen/sanitizer-special-case-list-globals.c [new file with mode: 0644]

index 95d1a4c..05738c0 100644 (file)
@@ -2760,21 +2760,14 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn,
   return false;
 }
 
-bool CodeGenModule::isInNoSanitizeList(llvm::GlobalVariable *GV,
+bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind,
+                                       llvm::GlobalVariable *GV,
                                        SourceLocation Loc, QualType Ty,
                                        StringRef Category) const {
-  // For now globals can be ignored only in ASan and KASan.
-  const SanitizerMask EnabledAsanMask =
-      LangOpts.Sanitize.Mask &
-      (SanitizerKind::Address | SanitizerKind::KernelAddress |
-       SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
-       SanitizerKind::MemTag);
-  if (!EnabledAsanMask)
-    return false;
   const auto &NoSanitizeL = getContext().getNoSanitizeList();
-  if (NoSanitizeL.containsGlobal(EnabledAsanMask, GV->getName(), Category))
+  if (NoSanitizeL.containsGlobal(Kind, GV->getName(), Category))
     return true;
-  if (NoSanitizeL.containsLocation(EnabledAsanMask, Loc, Category))
+  if (NoSanitizeL.containsLocation(Kind, Loc, Category))
     return true;
   // Check global type.
   if (!Ty.isNull()) {
@@ -2786,7 +2779,7 @@ bool CodeGenModule::isInNoSanitizeList(llvm::GlobalVariable *GV,
     // Only record types (classes, structs etc.) are ignored.
     if (Ty->isRecordType()) {
       std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy());
-      if (NoSanitizeL.containsType(EnabledAsanMask, TypeStr, Category))
+      if (NoSanitizeL.containsType(Kind, TypeStr, Category))
         return true;
     }
   }
index 5097ef0..779d94a 100644 (file)
@@ -1314,8 +1314,9 @@ public:
   bool isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn,
                           SourceLocation Loc) const;
 
-  bool isInNoSanitizeList(llvm::GlobalVariable *GV, SourceLocation Loc,
-                          QualType Ty, StringRef Category = StringRef()) const;
+  bool isInNoSanitizeList(SanitizerMask Kind, llvm::GlobalVariable *GV,
+                          SourceLocation Loc, QualType Ty,
+                          StringRef Category = StringRef()) const;
 
   /// Imbue XRay attributes to a function, applying the always/never attribute
   /// lists in the process. Returns true if we did imbue attributes this way,
index 8127e15..c3e23c3 100644 (file)
@@ -22,19 +22,66 @@ using namespace CodeGen;
 
 SanitizerMetadata::SanitizerMetadata(CodeGenModule &CGM) : CGM(CGM) {}
 
+// TODO(hctim): Can be removed when we migrate off of llvm.asan.globals. This
+// prevents llvm.asan.globals from being emitted for
+// __attribute__((disable_sanitizer_instrumentation)) and uses of
+// -fsanitize-ignorelist when a sanitizer isn't enabled.
 static bool isAsanHwasanOrMemTag(const SanitizerSet &SS) {
   return SS.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress |
-                     SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
-                     SanitizerKind::MemTag);
+                     SanitizerKind::HWAddress | SanitizerKind::MemTag);
+}
+
+SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
+  if (Mask & (SanitizerKind::Address | SanitizerKind::KernelAddress))
+    Mask |= SanitizerKind::Address | SanitizerKind::KernelAddress;
+  // Note: KHWASan doesn't support globals.
+  return Mask;
 }
 
 void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
                                      SourceLocation Loc, StringRef Name,
-                                     QualType Ty, bool IsDynInit,
-                                     bool IsExcluded) {
-  IsDynInit &= !CGM.isInNoSanitizeList(GV, Loc, Ty, "init");
-  IsExcluded |= CGM.isInNoSanitizeList(GV, Loc, Ty);
+                                     QualType Ty,
+                                     SanitizerMask NoSanitizeAttrMask,
+                                     bool IsDynInit) {
+  SanitizerSet FsanitizeArgument = CGM.getLangOpts().Sanitize;
+  if (!isAsanHwasanOrMemTag(FsanitizeArgument))
+    return;
+
+  FsanitizeArgument.Mask = expandKernelSanitizerMasks(FsanitizeArgument.Mask);
+  NoSanitizeAttrMask = expandKernelSanitizerMasks(NoSanitizeAttrMask);
+  SanitizerSet NoSanitizeAttrSet = {NoSanitizeAttrMask &
+                                    FsanitizeArgument.Mask};
+
+  llvm::GlobalVariable::SanitizerMetadata Meta;
+  if (GV->hasSanitizerMetadata())
+    Meta = GV->getSanitizerMetadata();
+
+  Meta.NoAddress |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::Address);
+  Meta.NoAddress |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::Address, GV, Loc, Ty);
+
+  Meta.NoHWAddress |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::HWAddress);
+  Meta.NoHWAddress |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
+
+  Meta.NoMemtag |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+  Meta.NoMemtag |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
 
+  if (FsanitizeArgument.has(SanitizerKind::Address)) {
+    // TODO(hctim): Make this conditional when we migrate off llvm.asan.globals.
+    IsDynInit &= !CGM.isInNoSanitizeList(SanitizerKind::Address |
+                                             SanitizerKind::KernelAddress,
+                                         GV, Loc, Ty, "init");
+    Meta.IsDynInit = IsDynInit;
+  }
+
+  bool IsExcluded = Meta.NoAddress || Meta.NoHWAddress || Meta.NoMemtag;
+
+  GV->setSanitizerMetadata(Meta);
+
+  // TODO(hctim): Code below can be removed when we migrate off of
+  // llvm.asan.globals onto the new metadata attributes.
   llvm::Metadata *LocDescr = nullptr;
   llvm::Metadata *GlobalName = nullptr;
   llvm::LLVMContext &VMContext = CGM.getLLVMContext();
@@ -77,23 +124,13 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
 
     return NoSanitizeMask;
   };
-  reportGlobal(GV, D.getLocation(), OS.str(), D.getType(), IsDynInit,
-               SanitizerSet{getNoSanitizeMask(D)}.has(SanitizerKind::Address));
-}
 
-void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
-                                     SourceLocation Loc, StringRef Name,
-                                     QualType Ty, bool IsDynInit) {
-  if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
-    return;
-  reportGlobal(GV, Loc, Name, Ty, IsDynInit, false);
+  reportGlobal(GV, D.getLocation(), OS.str(), D.getType(), getNoSanitizeMask(D),
+               IsDynInit);
 }
 
 void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
-  // For now, just make sure the global is not modified by the ASan
-  // instrumentation.
-  if (isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
-    reportGlobal(GV, SourceLocation(), "", QualType(), false, true);
+  reportGlobal(GV, SourceLocation(), "", QualType(), SanitizerKind::All);
 }
 
 void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) {
index a2290fa..41e1dca 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/Sanitizers.h"
 #include "clang/Basic/SourceLocation.h"
 
 namespace llvm {
@@ -40,7 +41,9 @@ public:
   void reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
                     bool IsDynInit = false);
   void reportGlobal(llvm::GlobalVariable *GV, SourceLocation Loc,
-                    StringRef Name, QualType Ty = {}, bool IsDynInit = false);
+                    StringRef Name, QualType Ty = {},
+                    SanitizerMask NoSanitizeAttrMask = {},
+                    bool IsDynInit = false);
   void disableSanitizerForGlobal(llvm::GlobalVariable *GV);
   void disableSanitizerForInstruction(llvm::Instruction *I);
 
diff --git a/clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt b/clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt
new file mode 100644 (file)
index 0000000..81eaa53
--- /dev/null
@@ -0,0 +1,9 @@
+global:always_ignored
+[address]
+global:asan_ignored
+[hwaddress]
+global:hwasan_ignored
+[memtag]
+global:memtag_ignored
+[cfi-vcall|cfi-icall]
+src:*
index 0b17038..80194f7 100644 (file)
@@ -23,6 +23,9 @@ void func() {
   const char *literal = "Hello, world!";
 }
 
+// ASAN: @{{.*}}dyn_init_global{{.*}} ={{.*}} global {{.*}}, sanitize_address_dyninit
+// KASAN: @{{.*}}dyn_init_global{{.*}} ={{.*}} global {{.*}}, sanitize_address_dyninit
+
 // ASAN: sectioned_global{{.*}} global { i32, [28 x i8] }{{.*}}, align 32
 // KASAN: sectioned_global{{.*}} global i32
 // ASAN: @__special_global{{.*}} global { i32, [28 x i8] }{{.*}}, align 32
index f39d056..f3cc2c3 100644 (file)
@@ -36,12 +36,29 @@ const volatile PODWithCtor array[5][5];
 
 // Check that ASan init-order checking ignores structs with trivial default
 // constructor.
+
+// CHECK: @s1 ={{.*}} global
+// CHECK-NOT: sanitize_address_dyninit
+// CHECK: @s2 ={{.*}} global
+// CHECK-NOT: sanitize_address_dyninit
+// CHECK: @s3 ={{.*}} global {{.*}}, sanitize_address_dyninit
+// CHECK: @{{.*}}array{{.*}} ={{.*}} global {{.*}}, sanitize_address_dyninit
+
 // CHECK: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]+]], ![[GLOB_4:[0-9]+]]
 // CHECK: ![[GLOB_1]] = !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
 // CHECK: ![[GLOB_2]] = !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
 // CHECK: ![[GLOB_3]] = !{%struct.PODWithCtorAndDtor* {{.*}}, i1 true, i1 false}
 // CHECK: ![[GLOB_4]] = !{{{.*}}class.NS::PODWithCtor{{.*}}, i1 true, i1 false}
 
+// IGNORELIST: @s1 ={{.*}} global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @s2 ={{.*}} global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @s3 ={{.*}} global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @{{.*}}array{{.*}} ={{.*}} global
+// IGNORELIST-NOT: sanitize_address_dyninit
+
 // IGNORELIST: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]+]], ![[GLOB_4:[0-9]+]]}
 // IGNORELIST: ![[GLOB_1]] = !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
 // IGNORELIST: ![[GLOB_2]] = !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
diff --git a/clang/test/CodeGen/sanitizer-special-case-list-globals.c b/clang/test/CodeGen/sanitizer-special-case-list-globals.c
new file mode 100644 (file)
index 0000000..5497aef
--- /dev/null
@@ -0,0 +1,91 @@
+/// Verify that ignorelist sections correctly select sanitizers to apply
+/// ignorelist entries to.
+
+// RUN: %clang_cc1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// Note: HWASan effectively reorders globals (it puts the unsanitized ones
+/// first), which is hard to check for, as 'CHECK-DAG' doesn't play terribly
+/// nicely with 'CHECK-NOT'. This is why the 'always_ignored' and
+/// 'hwasan_ignored' comes first in this file.
+// RUN: %clang_cc1 -fsanitize=hwaddress -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=HWASAN
+
+/// TODO(hctim): Move over to memtag-globals when it's implemented. For now
+/// though, it's fine, the frontend still annotates based on any memtag sanitizer
+/// being used.
+// RUN: %clang_cc1 -fsanitize=memtag-heap -triple=aarch64-linux-android31 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=MEMTAG
+
+/// Check that the '[cfi-vcall|cfi-icall] src:*' rule in the ignorelist doesn't change
+/// anything for ASan.
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// Check that -fsanitize=kernel-address picks up the '[address]' groups.
+// RUN: %clang_cc1 -fsanitize=kernel-address -mllvm -hwasan-kernel=1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// KHWASan doesn't instrument global variables.
+// RUN: %clang_cc1 -fsanitize=kernel-hwaddress -mllvm -hwasan-kernel=1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+/// Check that the '[cfi-vcall|cfi-icall] src:*' rule doesnt' emit anything for
+/// GVs.
+// RUN: %clang_cc1 -fsanitize=cfi-vcall,cfi-icall -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+// NONE:     @always_ignored ={{.*}} global
+// NONE-NOT: no_sanitize
+// ASAN:     @always_ignored ={{.*}} global {{.*}}, no_sanitize_address
+// HWASAN:   @always_ignored ={{.*}} global {{.*}}, no_sanitize_hwaddress
+// MEMTAG:   @always_ignored ={{.*}} global {{.*}}, no_sanitize_memtag
+unsigned always_ignored;
+
+// NONE:       @hwasan_ignored ={{.*}} global
+// NONE-NOT:   no_sanitize
+// ASAN:       @hwasan_ignored ={{.*}} global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @hwasan_ignored ={{.*}} global {{.*}}, no_sanitize_hwaddress
+// MEMTAG:     @hwasan_ignored ={{.*}} global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned hwasan_ignored;
+
+// NONE:       @asan_ignored ={{.*}} global
+// NONE-NOT:   asan_ignored
+// ASAN:       @asan_ignored ={{.*}} global {{.*}}, no_sanitize_address
+// HWASAN:     @asan_ignored.hwasan = {{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @asan_ignored ={{.*}} global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned asan_ignored;
+
+// NONE:       @memtag_ignored ={{.*}} global
+// NONE-NOT:   memtag_ignored
+// ASAN:       @memtag_ignored ={{.*}} global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @memtag_ignored.hwasan = {{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @memtag_ignored ={{.*}} global {{.*}}, no_sanitize_memtag
+unsigned memtag_ignored;
+
+// NONE:       @never_ignored ={{.*}} global
+// NONE-NOT:   never_ignored
+// ASAN:       @never_ignored ={{.*}} global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @never_ignored.hwasan ={{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @never_ignored ={{.*}} global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned never_ignored;