[MTE] Add AArch64GlobalsTagging Pass
authorMitch Phillips <31459023+hctim@users.noreply.github.com>
Tue, 31 Jan 2023 17:24:13 +0000 (09:24 -0800)
committerMitch Phillips <31459023+hctim@users.noreply.github.com>
Tue, 31 Jan 2023 17:24:18 +0000 (09:24 -0800)
Adds an IR pass for -fsanitize=memtag-globals. This pass goes over the
tag-capable global variables, and replaces them with a tagged global
variable of the same contents. This new global variable will have its
size and alignment adjusted if neccesary so that they're both a multiple
of the tag granule size (16 bytes).

Global merge must also be suppressed for tagged globals, as each global
variable must have a unique tag. This can possibly be relaxed in future;
globals that are identical in size, alignment, and content can possibly
be merged. The major problem comes from tail- or head-merging, which if
left unchecked, could have partially-overlapping global variables with
different memory tags, leading to crashes at runtime.

Reviewed By: fmayer, eugenis

Differential Revision: https://reviews.llvm.org/D133392

clang/test/CodeGen/memtag-globals-asm.cpp [new file with mode: 0644]
llvm/lib/CodeGen/GlobalMerge.cpp
llvm/lib/Target/AArch64/AArch64.h
llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp [new file with mode: 0644]
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/test/CodeGen/AArch64/O0-pipeline.ll
llvm/test/CodeGen/AArch64/O3-pipeline.ll

diff --git a/clang/test/CodeGen/memtag-globals-asm.cpp b/clang/test/CodeGen/memtag-globals-asm.cpp
new file mode 100644 (file)
index 0000000..0e9ab31
--- /dev/null
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -S -x c++ -std=c++11 -triple aarch64-linux-android31 \
+// RUN:   -fsanitize=memtag-globals -o %t.out %s
+// RUN: FileCheck %s --input-file=%t.out
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-A
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-B
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-C
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-D
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-E
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-F
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-G
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-H
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-I
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-J
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-K
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-L
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-M
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-N
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-O
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-P
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-Q
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-R
+
+// RUN: %clang_cc1 -O3 -S -x c++ -std=c++11 -triple aarch64-linux-android31 \
+// RUN:   -fsanitize=memtag-globals -o %t.out %s
+// RUN: FileCheck %s --input-file=%t.out
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-A
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-B
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-C
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-D
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-E
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-F
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-G
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-H
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-I
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-J
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-K
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-L
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-M
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-N
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-O
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-P
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-Q
+// RUN: FileCheck %s --input-file=%t.out --check-prefix=CHECK-R
+
+/// Ensure that emulated TLS also doesn't get sanitized.
+// RUN: %clang_cc1 -S -x c++ -std=c++11 -triple aarch64-linux-android31 \
+// RUN:   -fsanitize=memtag-globals -o - %s | FileCheck %s
+
+// CHECK-A: .memtag global_int
+// CHECK-A: .globl global_int
+// CHECK-A: .p2align 4, 0x0
+// CHECK-A: .size global_int, 16
+int global_int;
+// CHECK-B: .memtag _ZL9local_int
+// CHECK-B: .local _ZL9local_int
+// CHECK-B: .comm _ZL9local_int,16,16
+static int local_int;
+
+// CHECK-C: .memtag _ZL12local_buffer
+// CHECK-C: .local _ZL12local_buffer
+// CHECK-C: .comm _ZL12local_buffer,16,16
+static char local_buffer[16];
+// CHECK-D: .memtag _ZL22local_buffer_local_end
+// CHECK-D: .p2align 4, 0x0
+// CHECK-D: _ZL22local_buffer_local_end:
+// CHECK-D: .xword _ZL12local_buffer+16
+// CHECK-D: .size _ZL22local_buffer_local_end, 16
+static char* local_buffer_local_end = &local_buffer[16];
+// CHECK-E: .memtag local_buffer_global_end
+// CHECK-E: .globl local_buffer_global_end
+// CHECK-E  .p2align 4, 0x0
+// CHECK-E: local_buffer_global_end:
+// CHECK-E: .xword _ZL12local_buffer+16
+// CHECK-E: .size local_buffer_global_end, 16
+char* local_buffer_global_end = &local_buffer[16];
+
+// CHECK-F: .memtag global_buffer
+// CHECK-F: .globl global_buffer
+// CHECK-F: .p2align 4, 0x0
+// CHECK-F: .size global_buffer, 16
+char global_buffer[16];
+// CHECK-G: .memtag _ZL23global_buffer_local_end
+// CHECK-G: .p2align 4, 0x0
+// CHECK-G: _ZL23global_buffer_local_end:
+// CHECK-G: .xword global_buffer+16
+// CHECK-G: .size _ZL23global_buffer_local_end, 16
+static char* global_buffer_local_end = &global_buffer[16];
+// CHECK-H: .memtag global_buffer_global_end
+// CHECK-H: .p2align 4, 0x0
+// CHECK-H: global_buffer_global_end:
+// CHECK-H: .xword global_buffer+16
+// CHECK-H: .size global_buffer_global_end, 16
+char* global_buffer_global_end = &global_buffer[16];
+
+class MyClass {
+ public:
+  virtual ~MyClass() {}
+  static int my_class_int;
+  static const int my_class_const_int;
+  virtual int virtual_func() { return 1; }
+};
+// CHECK-I: .memtag _ZN7MyClass12my_class_intE
+// CHECK-I: .globl _ZN7MyClass12my_class_intE
+// CHECK-I: .p2align 4, 0x0
+// CHECK-I: .size _ZN7MyClass12my_class_intE, 16
+int MyClass::my_class_int;
+// CHECK-NOT: .memtag _ZN7MyClass18my_class_const_intE
+const int MyClass::my_class_const_int = 1;
+
+// CHECK-J: .memtag global_my_class
+// CHECK-J: .globl global_my_class
+// CHECK-J: .p2align 4, 0x0
+// CHECK-J: .size global_my_class, 16
+MyClass global_my_class;
+// CHECK-K: .memtag _ZL14local_my_class
+// CHECK-K: .p2align 4, 0x0
+// CHECK-K: .size _ZL14local_my_class, 16
+static MyClass local_my_class;
+
+// CHECK-NOT: .memtag _ZL18local_const_string
+static const char local_const_string[] = "this is a local string";
+// CHECK-L: .memtag _ZL12local_string
+// CHECK-L: .p2align 4, 0x0
+// CHECK-L: .size _ZL12local_string, 32
+static char local_string[] = "this is a local string";
+
+// CHECK-M: .memtag global_atomic_int
+// CHECK-M: .globl global_atomic_int
+// CHECK-M: .p2align 4, 0x0
+// CHECK-M: .size global_atomic_int, 16
+_Atomic(int) global_atomic_int;
+// CHECK-N: .memtag _ZL16local_atomic_int
+// CHECK-N: .local _ZL16local_atomic_int
+// CHECK-N: .comm _ZL16local_atomic_int,16,16
+static _Atomic(int) local_atomic_int;
+
+union MyUnion {
+  int i;
+  char c;
+};
+
+// CHECK-O: .memtag global_union
+// CHECK-O: .globl global_union
+// CHECK-O: .p2align 4, 0x0
+// CHECK-O: .size global_union, 16
+MyUnion global_union;
+// CHECK-P: .memtag _ZL11local_union
+// CHECK-P: .local _ZL11local_union
+// CHECK-P: .comm _ZL11local_union,16,16
+static MyUnion local_union;
+
+// CHECK-NOT: .memtag {{.*}}global_tls
+thread_local int global_tls;
+// CHECK-NOT: .memtag {{.*}}local_tls
+static thread_local int local_tls;
+
+/// Prevent the compiler from realising that non-const local variables are not
+/// modified, and constant inlining into f().
+const void* export_pointers(int c) {
+  switch (c) {
+    case 0:  return &local_int;
+    case 1:  return &local_buffer;
+    case 2:  return &local_buffer_local_end;
+    case 3:  return &global_buffer_local_end;
+    case 4:  return &MyClass::my_class_int;
+    case 6:  return &local_my_class;
+    case 8:  return &local_string;
+    case 9:  return &local_atomic_int;
+    case 10: return &local_union;
+    case 11: return &local_tls;
+  }
+  return nullptr;
+}
+
+/// Ensure that all tagged globals are loaded/referenced via. the GOT.
+// CHECK-NOT:      .memtag _Z1fi
+// CHECK-Q:        _Z1fi:
+int f(int x) {
+  // CHECK-R: .memtag _ZZ1fiE12function_int
+  // CHECK-R: .local _ZZ1fiE12function_int
+  // CHECK-R: .comm _ZZ1fiE12function_int,16,16
+  static int function_int = 0;
+  /// Prevent non-const `f` from being promoted to a constant and inlined.
+  function_int += x;
+
+  return
+  // CHECK-Q-DAG: adrp [[REG_A:x[0-9]+]], :got:global_int
+  // CHECK-Q-DAG: ldr  [[REG_A2:x[0-9]+]], [[[REG_A]], :got_lo12:global_int]
+  // CHECK-Q-DAG: ldr  {{.*}}, [[[REG_A2]]]
+      global_int +
+  // CHECK-Q-DAG: adrp [[REG_B:x[0-9]+]], :got:_ZL9local_int
+  // CHECK-Q-DAG: ldr  [[REG_B2:x[0-9]+]], [[[REG_B]], :got_lo12:_ZL9local_int]
+  // CHECK-Q-DAG: ldr  {{.*}}, [[[REG_B2]]]
+      local_int +
+  // CHECK-Q-DAG: adrp  [[REG_C:x[0-9]+]], :got:_ZL12local_buffer
+  // CHECK-Q-DAG: ldr   [[REG_C2:x[0-9]+]], [[[REG_C]], :got_lo12:_ZL12local_buffer]
+  // CHECK-Q-DAG: ldrsb {{.*}}, [[[REG_C2]]]
+      local_buffer[0] +
+  // CHECK-Q-DAG: adrp   [[REG_D:x[0-9]+]], :got:_ZL22local_buffer_local_end
+  // CHECK-Q-DAG: ldr    [[REG_D2:x[0-9]+]], [[[REG_D]], :got_lo12:_ZL22local_buffer_local_end]
+  // CHECK-Q-DAG: ldr    [[REG_D3:x[0-9]+]], [[[REG_D2]]]
+  // CHECK-Q-DAG: ldursb {{.*}}, [[[REG_D3]], #-16]
+      local_buffer_local_end[-16] +
+  // CHECK-Q-DAG: adrp   [[REG_E:x[0-9]+]], :got:local_buffer_global_end
+  // CHECK-Q-DAG: ldr    [[REG_E2:x[0-9]+]], [[[REG_E]], :got_lo12:local_buffer_global_end]
+  // CHECK-Q-DAG: ldr    [[REG_E3:x[0-9]+]], [[[REG_E2]]]
+  // CHECK-Q-DAG: ldursb {{.*}}, [[[REG_E3]], #-16]
+      local_buffer_global_end[-16] +
+  // CHECK-Q-DAG: adrp  [[REG_F:x[0-9]+]], :got:global_buffer{{$}}
+  // CHECK-Q-DAG: ldr   [[REG_F2:x[0-9]+]], [[[REG_F]], :got_lo12:global_buffer]
+  // CHECK-Q-DAG: ldrsb {{.*}}, [[[REG_F2]]]
+      global_buffer[0] +
+  // CHECK-Q-DAG: adrp   [[REG_G:x[0-9]+]], :got:_ZL23global_buffer_local_end
+  // CHECK-Q-DAG: ldr    [[REG_G2:x[0-9]+]], [[[REG_G]], :got_lo12:_ZL23global_buffer_local_end]
+  // CHECK-Q-DAG: ldr    [[REG_G3:x[0-9]+]], [[[REG_G2]]]
+  // CHECK-Q-DAG: ldursb {{.*}}, [[[REG_G3]], #-16]
+      global_buffer_local_end[-16] +
+  // CHECK-Q-DAG: adrp   [[REG_H:x[0-9]+]], :got:global_buffer_global_end
+  // CHECK-Q-DAG: ldr    [[REG_H2:x[0-9]+]], [[[REG_H]], :got_lo12:global_buffer_global_end]
+  // CHECK-Q-DAG: ldr    [[REG_H3:x[0-9]+]], [[[REG_H2]]]
+  // CHECK-Q-DAG: ldursb {{.*}}, [[[REG_H3]], #-16]
+      global_buffer_global_end[-16] +
+  // CHECK-Q-DAG: adrp [[REG_I:x[0-9]+]], :got:_ZN7MyClass12my_class_intE
+  // CHECK-Q-DAG: ldr  [[REG_I2:x[0-9]+]], [[[REG_I]], :got_lo12:_ZN7MyClass12my_class_intE]
+  // CHECK-Q-DAG: ldr  {{.*}}, [[[REG_I2]]]
+      MyClass::my_class_int +
+  /// Constant values - ignore.
+      MyClass::my_class_const_int +
+      global_my_class.virtual_func() +
+      local_my_class.virtual_func() +
+      local_const_string[0] +
+  // CHECK-Q-DAG: adrp  [[REG_J:x[0-9]+]], :got:_ZL12local_string
+  // CHECK-Q-DAG: ldr   [[REG_J2:x[0-9]+]], [[[REG_J]], :got_lo12:_ZL12local_string]
+  // CHECK-Q-DAG: ldrsb {{.*}}, [[[REG_J2]]]
+      local_string[0] +
+  // CHECK-Q-DAG: adrp  [[REG_K:x[0-9]+]], :got:_ZL16local_atomic_int
+  // CHECK-Q-DAG: ldr   [[REG_K2:x[0-9]+]], [[[REG_K]], :got_lo12:_ZL16local_atomic_int]
+  // CHECK-Q-DAG: ldar {{.*}}, [[[REG_K2]]]
+      local_atomic_int +
+  // CHECK-Q-DAG: adrp [[REG_L:x[0-9]+]], :got:global_atomic_int
+  // CHECK-Q-DAG: ldr  [[REG_L2:x[0-9]+]], [[[REG_L]], :got_lo12:global_atomic_int]
+  // CHECK-Q-DAG: ldar {{.*}}, [[[REG_L2]]]
+      global_atomic_int +
+  // CHECK-Q-DAG: adrp [[REG_M:x[0-9]+]], :got:global_union
+  // CHECK-Q-DAG: ldr  [[REG_M2:x[0-9]+]], [[[REG_M]], :got_lo12:global_union]
+  // CHECK-Q-DAG: ldr  {{.*}}, [[[REG_M2]]]
+      global_union.i +
+  // CHECK-Q-DAG: adrp  [[REG_N:x[0-9]+]], :got:_ZL11local_union
+  // CHECK-Q-DAG: ldr   [[REG_N2:x[0-9]+]], [[[REG_N]], :got_lo12:_ZL11local_union]
+  // CHECK-Q-DAG: ldrsb {{.*}}, [[[REG_N2]]]
+      local_union.c +
+  /// Global variables - ignore.
+      global_tls +
+      local_tls +
+  // CHECK-Q-DAG: adrp  [[REG_O:x[0-9]+]], :got:_ZZ1fiE12function_int
+  // CHECK-Q-DAG: ldr   [[REG_O2:x[0-9]+]], [[[REG_O]], :got_lo12:_ZZ1fiE12function_int]
+  // CHECK-Q-DAG: ldr   {{.*}}, [[[REG_O2]]]
+      function_int;
+}
index 2ccf2de..3e9a12b 100644 (file)
@@ -652,6 +652,14 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (isMustKeepGlobalVariable(&GV))
       continue;
 
+    // Don't merge tagged globals, as each global should have its own unique
+    // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
+    // with compatible alignment and the same contents may be merged as long as
+    // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
+    // size_b / 16`).
+    if (GV.isTagged())
+      continue;
+
     Type *Ty = GV.getValueType();
     if (DL.getTypeAllocSize(Ty) < MaxOffset) {
       if (TM &&
index 6ef0c80..fad6201 100644 (file)
@@ -70,6 +70,7 @@ FunctionPass *createAArch64PostLegalizerLowering();
 FunctionPass *createAArch64PostSelectOptimize();
 FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
 FunctionPass *createAArch64StackTaggingPreRAPass();
+ModulePass *createAArch64GlobalsTaggingPass();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
 void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -84,6 +85,7 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &);
 void initializeAArch64DAGToDAGISelPass(PassRegistry &);
 void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry &);
+void initializeAArch64GlobalsTaggingPass(PassRegistry &);
 void initializeAArch64KCFIPass(PassRegistry &);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
new file mode 100644 (file)
index 0000000..2ed6687
--- /dev/null
@@ -0,0 +1,142 @@
+//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static const Align kTagGranuleSize = Align(16);
+
+static bool shouldTagGlobal(GlobalVariable &G) {
+  if (!G.isTagged())
+    return false;
+
+  assert(G.hasSanitizerMetadata() &&
+         "Missing sanitizer metadata, but symbol is apparently tagged.");
+  GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
+
+  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+  // may be worth instrumenting these in future to stop them from being used as
+  // gadgets.
+  if (G.getName().startswith("llvm.") || G.isThreadLocal() || G.isConstant()) {
+    Meta.Memtag = false;
+    G.setSanitizerMetadata(Meta);
+    return false;
+  }
+
+  return true;
+}
+
+// Technically, due to ELF symbol interposition semantics, we can't change the
+// alignment or size of symbols. If we increase the alignment or size of a
+// symbol, the compiler may make optimisations based on this new alignment or
+// size. If the symbol is interposed, this optimisation could lead to
+// alignment-related or OOB read/write crashes.
+//
+// This is handled in the linker. When the linker sees multiple declarations of
+// a global variable, and some are tagged, and some are untagged, it resolves it
+// to be an untagged definition - but preserves the tag-granule-rounded size and
+// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
+// For cross-DSO, it's been a reasonable contract that if you're interposing a
+// sanitizer-instrumented global, then the interposer also needs to be
+// sanitizer-instrumented.
+//
+// FIXME: In theory, this can be fixed by splitting the size/alignment of
+// globals into two uses: an "output alignment" that's emitted to the ELF file,
+// and an "optimisation alignment" that's used for optimisation. Thus, we could
+// adjust the output alignment only, and still optimise based on the pessimistic
+// pre-tagging size/alignment.
+static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
+  Constant *Initializer = G->getInitializer();
+  uint64_t SizeInBytes =
+      M.getDataLayout().getTypeAllocSize(Initializer->getType());
+
+  uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
+  if (SizeInBytes != NewSize) {
+    // Pad the initializer out to the next multiple of 16 bytes.
+    llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
+    Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
+    Initializer = ConstantStruct::getAnon({Initializer, Padding});
+    auto *NewGV = new GlobalVariable(
+        M, Initializer->getType(), G->isConstant(), G->getLinkage(),
+        Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
+    NewGV->copyAttributesFrom(G);
+    NewGV->setComdat(G->getComdat());
+    NewGV->copyMetadata(G, 0);
+
+    NewGV->takeName(G);
+    G->replaceAllUsesWith(NewGV);
+    G->eraseFromParent();
+    G = NewGV;
+  }
+
+  G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
+
+  // Ensure that tagged globals don't get merged by ICF - as they should have
+  // different tags at runtime.
+  G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+}
+
+namespace {
+class AArch64GlobalsTagging : public ModulePass {
+public:
+  static char ID;
+
+  explicit AArch64GlobalsTagging() : ModulePass(ID) {
+    initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+
+  StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
+
+private:
+  std::set<GlobalVariable *> GlobalsToTag;
+};
+} // anonymous namespace
+
+char AArch64GlobalsTagging::ID = 0;
+
+bool AArch64GlobalsTagging::runOnModule(Module &M) {
+  // No mutating the globals in-place, or iterator invalidation occurs.
+  std::vector<GlobalVariable *> GlobalsToTag;
+  for (GlobalVariable &G : M.globals()) {
+    if (G.isDeclaration() || !shouldTagGlobal(G))
+      continue;
+    GlobalsToTag.push_back(&G);
+  }
+
+  for (GlobalVariable *G : GlobalsToTag) {
+    tagGlobalDefinition(M, G);
+  }
+
+  return true;
+}
+
+INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
+                      "AArch64 Globals Tagging Pass", false, false)
+INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
+                    "AArch64 Globals Tagging Pass", false, false)
+
+ModulePass *llvm::createAArch64GlobalsTaggingPass() {
+  return new AArch64GlobalsTagging();
+}
index 245ed81..5f5df2e 100644 (file)
@@ -363,6 +363,13 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     return AArch64II::MO_GOT;
 
+  // All globals dynamically protected by MTE must have their address tags
+  // synthesized. This is done by having the loader stash the tag in the GOT
+  // entry. Force all tagged globals (even ones with internal linkage) through
+  // the GOT.
+  if (GV->isTagged())
+    return AArch64II::MO_GOT;
+
   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
     if (GV->hasDLLImportStorageClass()) {
       if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
index eafd311..1944a89 100644 (file)
@@ -238,6 +238,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64StackTaggingPreRAPass(*PR);
   initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
   initializeAArch64DAGToDAGISelPass(*PR);
+  initializeAArch64GlobalsTaggingPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -587,6 +588,7 @@ void AArch64PassConfig::addIRPasses() {
   if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt)
     addPass(createSelectOptimizePass());
 
+  addPass(createAArch64GlobalsTaggingPass());
   addPass(createAArch64StackTaggingPass(
       /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
 
index f42ab1f..fd3f2d5 100644 (file)
@@ -56,6 +56,7 @@ add_llvm_target(AArch64CodeGen
   AArch64FastISel.cpp
   AArch64A53Fix835769.cpp
   AArch64FrameLowering.cpp
+  AArch64GlobalsTagging.cpp
   AArch64CompressJumpTables.cpp
   AArch64ConditionOptimizer.cpp
   AArch64RedundantCopyElimination.cpp
index e6a221c..0cd40aa 100644 (file)
@@ -26,6 +26,8 @@
 ; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
+; CHECK-NEXT:     AArch64 Globals Tagging
+; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       AArch64 Stack Tagging
 ; CHECK-NEXT:       SME ABI Pass
 ; CHECK-NEXT:       Exception handling preparation
index 3d8fcee..f526dc6 100644 (file)
@@ -70,6 +70,7 @@
 ; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Optimize selects
+; CHECK-NEXT:     AArch64 Globals Tagging
 ; CHECK-NEXT:     Stack Safety Analysis
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction