Add sanitizer metadata attributes to clang IR gen.
authorMitch Phillips <31459023+hctim@users.noreply.github.com>
Mon, 13 Jun 2022 18:18:55 +0000 (11:18 -0700)
committerMitch Phillips <31459023+hctim@users.noreply.github.com>
Mon, 13 Jun 2022 18:19:15 +0000 (11:19 -0700)
This patch adds generation of sanitizer metadata attributes (which were
added in D126100) to the clang frontend.

We still currently generate the `llvm.asan.globals` that's consumed by
the IR pass, but the plan is to eventually migrate off of that onto
purely debuginfo and these IR attributes.

Reviewed By: vitalybuka, kstoimenov

Differential Revision: https://reviews.llvm.org/D126929

clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/CodeGen/SanitizerMetadata.cpp
clang/lib/CodeGen/SanitizerMetadata.h
clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt [new file with mode: 0644]
clang/test/CodeGen/asan-globals.cpp
clang/test/CodeGen/sanitize-init-order.cpp
clang/test/CodeGen/sanitizer-special-case-list-globals.c [new file with mode: 0644]

index 95d1a4c5f1bcf33f6a7e35759e16743c1991857c..05738c03796be2d0d7b6169a496937cb305f9044 100644 (file)
@@ -2760,21 +2760,14 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn,
   return false;
 }
 
-bool CodeGenModule::isInNoSanitizeList(llvm::GlobalVariable *GV,
+bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind,
+                                       llvm::GlobalVariable *GV,
                                        SourceLocation Loc, QualType Ty,
                                        StringRef Category) const {
-  // For now globals can be ignored only in ASan and KASan.
-  const SanitizerMask EnabledAsanMask =
-      LangOpts.Sanitize.Mask &
-      (SanitizerKind::Address | SanitizerKind::KernelAddress |
-       SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
-       SanitizerKind::MemTag);
-  if (!EnabledAsanMask)
-    return false;
   const auto &NoSanitizeL = getContext().getNoSanitizeList();
-  if (NoSanitizeL.containsGlobal(EnabledAsanMask, GV->getName(), Category))
+  if (NoSanitizeL.containsGlobal(Kind, GV->getName(), Category))
     return true;
-  if (NoSanitizeL.containsLocation(EnabledAsanMask, Loc, Category))
+  if (NoSanitizeL.containsLocation(Kind, Loc, Category))
     return true;
   // Check global type.
   if (!Ty.isNull()) {
@@ -2786,7 +2779,7 @@ bool CodeGenModule::isInNoSanitizeList(llvm::GlobalVariable *GV,
     // Only record types (classes, structs etc.) are ignored.
     if (Ty->isRecordType()) {
       std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy());
-      if (NoSanitizeL.containsType(EnabledAsanMask, TypeStr, Category))
+      if (NoSanitizeL.containsType(Kind, TypeStr, Category))
         return true;
     }
   }
index 5097ef018942a739ce9a873ee6cf9497ba143715..779d94ad62d98686636ef7dbf367b2d2ef92aa0b 100644 (file)
@@ -1314,8 +1314,9 @@ public:
   bool isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn,
                           SourceLocation Loc) const;
 
-  bool isInNoSanitizeList(llvm::GlobalVariable *GV, SourceLocation Loc,
-                          QualType Ty, StringRef Category = StringRef()) const;
+  bool isInNoSanitizeList(SanitizerMask Kind, llvm::GlobalVariable *GV,
+                          SourceLocation Loc, QualType Ty,
+                          StringRef Category = StringRef()) const;
 
   /// Imbue XRay attributes to a function, applying the always/never attribute
   /// lists in the process. Returns true if we did imbue attributes this way,
index 8127e1519261266be300ba29ad6004ba0bb245bf..c3e23c31b37a4abf4c6ca709aa4713ec77c221aa 100644 (file)
@@ -22,19 +22,66 @@ using namespace CodeGen;
 
 SanitizerMetadata::SanitizerMetadata(CodeGenModule &CGM) : CGM(CGM) {}
 
+// TODO(hctim): Can be removed when we migrate off of llvm.asan.globals. This
+// prevents llvm.asan.globals from being emitted for
+// __attribute__((disable_sanitizer_instrumentation)) and uses of
+// -fsanitize-ignorelist when a sanitizer isn't enabled.
 static bool isAsanHwasanOrMemTag(const SanitizerSet &SS) {
   return SS.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress |
-                     SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
-                     SanitizerKind::MemTag);
+                     SanitizerKind::HWAddress | SanitizerKind::MemTag);
+}
+
+SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
+  if (Mask & (SanitizerKind::Address | SanitizerKind::KernelAddress))
+    Mask |= SanitizerKind::Address | SanitizerKind::KernelAddress;
+  // Note: KHWASan doesn't support globals.
+  return Mask;
 }
 
 void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
                                      SourceLocation Loc, StringRef Name,
-                                     QualType Ty, bool IsDynInit,
-                                     bool IsExcluded) {
-  IsDynInit &= !CGM.isInNoSanitizeList(GV, Loc, Ty, "init");
-  IsExcluded |= CGM.isInNoSanitizeList(GV, Loc, Ty);
+                                     QualType Ty,
+                                     SanitizerMask NoSanitizeAttrMask,
+                                     bool IsDynInit) {
+  SanitizerSet FsanitizeArgument = CGM.getLangOpts().Sanitize;
+  if (!isAsanHwasanOrMemTag(FsanitizeArgument))
+    return;
+
+  FsanitizeArgument.Mask = expandKernelSanitizerMasks(FsanitizeArgument.Mask);
+  NoSanitizeAttrMask = expandKernelSanitizerMasks(NoSanitizeAttrMask);
+  SanitizerSet NoSanitizeAttrSet = {NoSanitizeAttrMask &
+                                    FsanitizeArgument.Mask};
+
+  llvm::GlobalVariable::SanitizerMetadata Meta;
+  if (GV->hasSanitizerMetadata())
+    Meta = GV->getSanitizerMetadata();
+
+  Meta.NoAddress |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::Address);
+  Meta.NoAddress |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::Address, GV, Loc, Ty);
+
+  Meta.NoHWAddress |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::HWAddress);
+  Meta.NoHWAddress |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
+
+  Meta.NoMemtag |= NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+  Meta.NoMemtag |= CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
 
+  if (FsanitizeArgument.has(SanitizerKind::Address)) {
+    // TODO(hctim): Make this conditional when we migrate off llvm.asan.globals.
+    IsDynInit &= !CGM.isInNoSanitizeList(SanitizerKind::Address |
+                                             SanitizerKind::KernelAddress,
+                                         GV, Loc, Ty, "init");
+    Meta.IsDynInit = IsDynInit;
+  }
+
+  bool IsExcluded = Meta.NoAddress || Meta.NoHWAddress || Meta.NoMemtag;
+
+  GV->setSanitizerMetadata(Meta);
+
+  // TODO(hctim): Code below can be removed when we migrate off of
+  // llvm.asan.globals onto the new metadata attributes.
   llvm::Metadata *LocDescr = nullptr;
   llvm::Metadata *GlobalName = nullptr;
   llvm::LLVMContext &VMContext = CGM.getLLVMContext();
@@ -77,23 +124,13 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
 
     return NoSanitizeMask;
   };
-  reportGlobal(GV, D.getLocation(), OS.str(), D.getType(), IsDynInit,
-               SanitizerSet{getNoSanitizeMask(D)}.has(SanitizerKind::Address));
-}
 
-void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
-                                     SourceLocation Loc, StringRef Name,
-                                     QualType Ty, bool IsDynInit) {
-  if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
-    return;
-  reportGlobal(GV, Loc, Name, Ty, IsDynInit, false);
+  reportGlobal(GV, D.getLocation(), OS.str(), D.getType(), getNoSanitizeMask(D),
+               IsDynInit);
 }
 
 void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
-  // For now, just make sure the global is not modified by the ASan
-  // instrumentation.
-  if (isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
-    reportGlobal(GV, SourceLocation(), "", QualType(), false, true);
+  reportGlobal(GV, SourceLocation(), "", QualType(), SanitizerKind::All);
 }
 
 void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) {
index a2290fa290f5de623d95442bcb542bb4b6c7d6aa..41e1dca32aad700366f76ce8cb4899e42b433784 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/Sanitizers.h"
 #include "clang/Basic/SourceLocation.h"
 
 namespace llvm {
@@ -40,7 +41,9 @@ public:
   void reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
                     bool IsDynInit = false);
   void reportGlobal(llvm::GlobalVariable *GV, SourceLocation Loc,
-                    StringRef Name, QualType Ty = {}, bool IsDynInit = false);
+                    StringRef Name, QualType Ty = {},
+                    SanitizerMask NoSanitizeAttrMask = {},
+                    bool IsDynInit = false);
   void disableSanitizerForGlobal(llvm::GlobalVariable *GV);
   void disableSanitizerForInstruction(llvm::Instruction *I);
 
diff --git a/clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt b/clang/test/CodeGen/Inputs/sanitizer-special-case-list-globals.txt
new file mode 100644 (file)
index 0000000..81eaa53
--- /dev/null
@@ -0,0 +1,9 @@
+global:always_ignored
+[address]
+global:asan_ignored
+[hwaddress]
+global:hwasan_ignored
+[memtag]
+global:memtag_ignored
+[cfi-vcall|cfi-icall]
+src:*
index 0b17038a5f15a803d82ff04f6e674898fca4ba86..3d604478abe16449a47e33afb21cabc8c904c8ea 100644 (file)
@@ -23,6 +23,9 @@ void func() {
   const char *literal = "Hello, world!";
 }
 
+// ASAN: @dyn_init_global = global {{.*}}, sanitize_address_dyninit
+// KASAN: @dyn_init_global = global {{.*}}, sanitize_address_dyninit
+
 // ASAN: sectioned_global{{.*}} global { i32, [28 x i8] }{{.*}}, align 32
 // KASAN: sectioned_global{{.*}} global i32
 // ASAN: @__special_global{{.*}} global { i32, [28 x i8] }{{.*}}, align 32
index f39d056d78d5a12e69af3a1f018b951d847264ca..92a9033ce885fd6a4b52a89db6865b93bedb1b85 100644 (file)
@@ -36,12 +36,29 @@ const volatile PODWithCtor array[5][5];
 
 // Check that ASan init-order checking ignores structs with trivial default
 // constructor.
+
+// CHECK: @s1 = global
+// CHECK-NOT: sanitize_address_dyninit
+// CHECK: @s2 = global
+// CHECK-NOT: sanitize_address_dyninit
+// CHECK: @s3 = global {{.*}}, sanitize_address_dyninit
+// CHECK: @{{.*}}array{{.*}} = global {{.*}}, sanitize_address_dyninit
+
 // CHECK: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]+]], ![[GLOB_4:[0-9]+]]
 // CHECK: ![[GLOB_1]] = !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
 // CHECK: ![[GLOB_2]] = !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
 // CHECK: ![[GLOB_3]] = !{%struct.PODWithCtorAndDtor* {{.*}}, i1 true, i1 false}
 // CHECK: ![[GLOB_4]] = !{{{.*}}class.NS::PODWithCtor{{.*}}, i1 true, i1 false}
 
+// IGNORELIST: @s1 = global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @s2 = global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @s3 = global
+// IGNORELIST-NOT: sanitize_address_dyninit
+// IGNORELIST: @{{.*}}array{{.*}} = global
+// IGNORELIST-NOT: sanitize_address_dyninit
+
 // IGNORELIST: !llvm.asan.globals = !{![[GLOB_1:[0-9]+]], ![[GLOB_2:[0-9]+]], ![[GLOB_3:[0-9]+]], ![[GLOB_4:[0-9]+]]}
 // IGNORELIST: ![[GLOB_1]] = !{%struct.PODStruct* {{.*}}, i1 false, i1 false}
 // IGNORELIST: ![[GLOB_2]] = !{%struct.PODWithDtor* {{.*}}, i1 false, i1 false}
diff --git a/clang/test/CodeGen/sanitizer-special-case-list-globals.c b/clang/test/CodeGen/sanitizer-special-case-list-globals.c
new file mode 100644 (file)
index 0000000..fe06383
--- /dev/null
@@ -0,0 +1,91 @@
+/// Verify that ignorelist sections correctly select sanitizers to apply
+/// ignorelist entries to.
+
+// RUN: %clang_cc1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// Note: HWASan effectively reorders globals (it puts the unsanitized ones
+/// first), which is hard to check for, as 'CHECK-DAG' doesn't play terribly
+/// nicely with 'CHECK-NOT'. This is why the 'always_ignored' and
+/// 'hwasan_ignored' comes first in this file.
+// RUN: %clang_cc1 -fsanitize=hwaddress -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=HWASAN
+
+/// TODO(hctim): Move over to memtag-globals when it's implemented. For now
+/// though, it's fine, the frontend still annotates based on any memtag sanitizer
+/// being used.
+// RUN: %clang_cc1 -fsanitize=memtag-heap -triple=aarch64-linux-android31 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=MEMTAG
+
+/// Check that the '[cfi-vcall|cfi-icall] src:*' rule in the ignorelist doesn't change
+/// anything for ASan.
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// Check that -fsanitize=kernel-address picks up the '[address]' groups.
+// RUN: %clang_cc1 -fsanitize=kernel-address -mllvm -hwasan-kernel=1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=ASAN
+
+/// KHWASan doesn't instrument global variables.
+// RUN: %clang_cc1 -fsanitize=kernel-hwaddress -mllvm -hwasan-kernel=1 -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+/// Check that the '[cfi-vcall|cfi-icall] src:*' rule doesnt' emit anything for
+/// GVs.
+// RUN: %clang_cc1 -fsanitize=cfi-vcall,cfi-icall -emit-llvm %s -o -\
+// RUN: -fsanitize-ignorelist=%S/Inputs/sanitizer-special-case-list-globals.txt \
+// RUN: | FileCheck %s --check-prefix=NONE
+
+// NONE:     @always_ignored = global
+// NONE-NOT: no_sanitize
+// ASAN:     @always_ignored = global {{.*}}, no_sanitize_address
+// HWASAN:   @always_ignored = global {{.*}}, no_sanitize_hwaddress
+// MEMTAG:   @always_ignored = global {{.*}}, no_sanitize_memtag
+unsigned always_ignored;
+
+// NONE:       @hwasan_ignored = global
+// NONE-NOT:   no_sanitize
+// ASAN:       @hwasan_ignored = global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @hwasan_ignored = global {{.*}}, no_sanitize_hwaddress
+// MEMTAG:     @hwasan_ignored = global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned hwasan_ignored;
+
+// NONE:       @asan_ignored = global
+// NONE-NOT:   asan_ignored
+// ASAN:       @asan_ignored = global {{.*}}, no_sanitize_address
+// HWASAN:     @asan_ignored.hwasan = {{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @asan_ignored = global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned asan_ignored;
+
+// NONE:       @memtag_ignored = global
+// NONE-NOT:   memtag_ignored
+// ASAN:       @memtag_ignored = global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @memtag_ignored.hwasan = {{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @memtag_ignored = global {{.*}}, no_sanitize_memtag
+unsigned memtag_ignored;
+
+// NONE:       @never_ignored = global
+// NONE-NOT:   never_ignored
+// ASAN:       @never_ignored = global
+// ASAN-NOT:   no_sanitize_address
+// HWASAN:     @never_ignored.hwasan = {{.*}} global
+// HWASAN-NOT: no_sanitize_hwaddress
+// MEMTAG:     @never_ignored = global
+// MEMTAG-NOT: no_sanitize_memtag
+unsigned never_ignored;