From 3770b4aa3c5a06efbf43b5c22b2b641794a41dca Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 23 Jan 2023 11:22:11 +0000
Subject: [PATCH] [ARM] Don't emit Arm speculation hardening thunks under Thumb
 and vice-versa

Given a patch like D129506, using instructions not valid for the current
target feature set becomes an error. This means that emitting Arm
instructions in a Thumb target (or vice versa) becomes an error. When
running in Thumb mode only thumb thunks will be needed, and in Arm mode
only arm thunks are needed. This patch limits the emitted thunks to just
the ones valid for the current architecture.

Differential Revision: https://reviews.llvm.org/D129693
---
 llvm/include/llvm/CodeGen/IndirectThunks.h         | 35 ++++++++++++----------
 llvm/lib/Target/AArch64/AArch64SLSHardening.cpp    | 10 +++++--
 llvm/lib/Target/ARM/ARMSLSHardening.cpp            | 30 +++++++++++++++----
 llvm/lib/Target/X86/X86IndirectThunks.cpp          | 17 +++++++----
 .../ARM/speculation-hardening-sls-boththunks.ll    | 17 +++++++++++
 llvm/test/CodeGen/ARM/speculation-hardening-sls.ll |  3 +-
 6 files changed, 82 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll
diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h
index a2cdd0a..6da60fb 100644
--- a/llvm/include/llvm/CodeGen/IndirectThunks.h
+++ b/llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -21,27 +21,32 @@
 
 namespace llvm {
 
-template <typename Derived> class ThunkInserter {
+template <typename Derived, typename InsertedThunksTy = bool>
+class ThunkInserter {
   Derived &getDerived() { return *static_cast<Derived *>(this); }
 
 protected:
-  bool InsertedThunks;
+  // A variable used to track whether (and possible which) thunks have been
+  // inserted so far. InsertedThunksTy is usually a bool, but can be other types
+  // to represent more than one type of thunk. Requires an |= operator to
+  // accumulate results.
+  InsertedThunksTy InsertedThunks;
   void doInitialization(Module &M) {}
   void createThunkFunction(MachineModuleInfo &MMI, StringRef Name,
                            bool Comdat = true);
 
 public:
   void init(Module &M) {
-    InsertedThunks = false;
+    InsertedThunks = InsertedThunksTy{};
     getDerived().doInitialization(M);
   }
   // return `true` if `MMI` or `MF` was modified
   bool run(MachineModuleInfo &MMI, MachineFunction &MF);
 };
 
-template <typename Derived>
-void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
-                                                 StringRef Name, bool Comdat) {
+template <typename Derived, typename InsertedThunksTy>
+void ThunkInserter<Derived, InsertedThunksTy>::createThunkFunction(
+    MachineModuleInfo &MMI, StringRef Name, bool Comdat) {
   assert(Name.startswith(getDerived().getThunkPrefix()) &&
          "Created a thunk with an unexpected prefix!");
 
@@ -82,26 +87,24 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
 }
 
-template <typename Derived>
-bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+template <typename Derived, typename InsertedThunksTy>
+bool ThunkInserter<Derived, InsertedThunksTy>::run(MachineModuleInfo &MMI,
+                                                   MachineFunction &MF) {
   // If MF is not a thunk, check to see if we need to insert a thunk.
   if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
-    // If we've already inserted a thunk, nothing else to do.
-    if (InsertedThunks)
-      return false;
-
     // Only add a thunk if one of the functions has the corresponding feature
-    // enabled in its subtarget, and doesn't enable external thunks.
+    // enabled in its subtarget, and doesn't enable external thunks. The target
+    // can use InsertedThunks to detect whether relevant thunks have already
+    // been inserted.
     // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
     // nothing will end up calling it.
     // FIXME: It's a little silly to look at every function just to enumerate
     // the subtargets, but eventually we'll want to look at them for indirect
     // calls, so maybe this is OK.
-    if (!getDerived().mayUseThunk(MF))
+    if (!getDerived().mayUseThunk(MF, InsertedThunks))
       return false;
 
-    getDerived().insertThunks(MMI);
-    InsertedThunks = true;
+    InsertedThunks |= getDerived().insertThunks(MMI, MF);
     return true;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
index 364ce68..cd65c16 100644
--- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -185,13 +185,15 @@ static const struct ThunkNameAndReg {
 namespace {
 struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
   const char *getThunkPrefix() { return SLSBLRNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     ComdatThunks &= !MF.getSubtarget<AArch64Subtarget>().hardenSlsNoComdat();
     // FIXME: This could also check if there are any BLRs in the function
     // to more accurately reflect if a thunk will be needed.
     return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 
 private:
@@ -199,12 +201,14 @@ private:
 };
 } // namespace
 
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+bool SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                       MachineFunction &MF) {
   // FIXME: It probably would be possible to filter which thunks to produce
   // based on which registers are actually used in BLR instructions in this
   // function. But would that be a worthwhile optimization?
   for (auto T : SLSBLRThunks)
     createThunkFunction(MMI, T.Name, ComdatThunks);
+  return true;
 }
 
 void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
index fa80b75..3dd9428 100644
--- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -161,16 +161,32 @@ static const struct ThunkNameRegMode {
     {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true},
 };
 
+// An enum for tracking whether Arm and Thumb thunks have been inserted into the
+// current module so far.
+enum ArmInsertedThunks { ArmThunk = 1, ThumbThunk = 2 };
+
+inline ArmInsertedThunks &operator|=(ArmInsertedThunks &X,
+                                     ArmInsertedThunks Y) {
+  return X = static_cast<ArmInsertedThunks>(X | Y);
+}
+
 namespace {
-struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
+struct SLSBLRThunkInserter
+    : ThunkInserter<SLSBLRThunkInserter, ArmInsertedThunks> {
   const char *getThunkPrefix() { return SLSBLRNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF,
+                   ArmInsertedThunks InsertedThunks) {
+    if ((InsertedThunks & ArmThunk &&
+         !MF.getSubtarget<ARMSubtarget>().isThumb()) ||
+        (InsertedThunks & ThumbThunk &&
+         MF.getSubtarget<ARMSubtarget>().isThumb()))
+      return false;
     ComdatThunks &= !MF.getSubtarget<ARMSubtarget>().hardenSlsNoComdat();
     // FIXME: This could also check if there are any indirect calls in the
     // function to more accurately reflect if a thunk will be needed.
     return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  ArmInsertedThunks insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 
 private:
@@ -178,12 +194,16 @@ private:
 };
 } // namespace
 
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ArmInsertedThunks SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                                    MachineFunction &MF) {
   // FIXME: It probably would be possible to filter which thunks to produce
   // based on which registers are actually used in indirect calls in this
   // function. But would that be a worthwhile optimization?
+  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
   for (auto T : SLSBLRThunks)
-    createThunkFunction(MMI, T.Name, ComdatThunks);
+    if (ST->isThumb() == T.isThumb)
+      createThunkFunction(MMI, T.Name, ComdatThunks);
+  return ST->isThumb() ? ThumbThunk : ArmThunk;
 }
 
 void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
index 860af47..9db6679 100644
--- a/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -61,23 +61,28 @@ static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
 namespace {
 struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
   const char *getThunkPrefix() { return RetpolineNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     const auto &STI = MF.getSubtarget<X86Subtarget>();
     return (STI.useRetpolineIndirectCalls() ||
             STI.useRetpolineIndirectBranches()) &&
            !STI.useRetpolineExternalThunk();
   }
-  void insertThunks(MachineModuleInfo &MMI);
+  bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 };
 
 struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
   const char *getThunkPrefix() { return LVIThunkNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
+  bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
+    if (InsertedThunks)
+      return false;
     return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
   }
-  void insertThunks(MachineModuleInfo &MMI) {
+  bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) {
     createThunkFunction(MMI, R11LVIThunkName);
+    return true;
   }
   void populateThunk(MachineFunction &MF) {
     assert (MF.size() == 1);
@@ -132,13 +137,15 @@ private:
 
 } // end anonymous namespace
 
-void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI,
+                                          MachineFunction &MF) {
   if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
     createThunkFunction(MMI, R11RetpolineName);
   else
     for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
                            EDIRetpolineName})
       createThunkFunction(MMI, Name);
+  return true;
 }
 
 void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll b/llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll
new file mode 100644
index 0000000..dd25869
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/speculation-hardening-sls-boththunks.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mattr=harden-sls-retbr -mattr=harden-sls-blr -verify-machineinstrs -mtriple=armv8-linux-gnueabi < %s | FileCheck %s
+
+; Given both Arm and Thumb functions in the same compilation unit, we should
+; get both arm and thumb thunks.
+
+define i32 @test1(i32 %a, i32 %b) {
+  ret i32 %a
+}
+
+define i32 @test2(i32 %a, i32 %b) "target-features"="+thumb-mode" {
+  ret i32 %a
+}
+
+; CHECK: test1
+; CHECK: test2
+; CHECK: __llvm_slsblr_thunk_arm_sp
+; CHECK: __llvm_slsblr_thunk_thumb_sp
\ No newline at end of file
diff --git a/llvm/test/CodeGen/ARM/speculation-hardening-sls.ll b/llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
index 282ffb2..f25d73a 100644
--- a/llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
+++ b/llvm/test/CodeGen/ARM/speculation-hardening-sls.ll
@@ -256,4 +256,5 @@ entry:
 ; SB-NEXT:     isb
 ; HARDEN-NEXT: .Lfunc_end
 
-
+; THUMB-NOT: __llvm_slsblr_thunk_arm
+; ARM-NOT: __llvm_slsblr_thunk_thumb
-- 
2.7.4