[VirtRegRewriter] Avoid clobbering registers when expanding copy bundles
authorJustin Bogner <mail@justinbogner.com>
Thu, 14 Jun 2018 18:32:55 +0000 (18:32 +0000)
committerJustin Bogner <mail@justinbogner.com>
Thu, 14 Jun 2018 18:32:55 +0000 (18:32 +0000)
If a copy bundle happens to involve overlapping registers, we can end
up with emitting the copies in an order that ends up clobbering some
of the subregisters. Since instructions in the copy bundle
semantically happen at the same time, this is incorrect and we need to
make sure we order the copies such that this doesn't happen.

Differential Revision: https://reviews.llvm.org/D48154

llvm-svn: 334750

llvm/lib/CodeGen/VirtRegMap.cpp
llvm/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir [new file with mode: 0644]
llvm/test/CodeGen/AArch64/overlapping-copy-bundle.mir [new file with mode: 0644]

index 3f9dcd4..2741dbe 100644 (file)
@@ -406,6 +406,8 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
     return;
 
   if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+    SmallVector<MachineInstr *, 2> MIs({&MI});
+
     // Only do this when the complete bundle is made out of COPYs.
     MachineBasicBlock &MBB = *MI.getParent();
     for (MachineBasicBlock::reverse_instr_iterator I =
@@ -413,16 +415,53 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
          I != E && I->isBundledWithSucc(); ++I) {
       if (!I->isCopy())
         return;
+      MIs.push_back(&*I);
+    }
+    MachineInstr *FirstMI = MIs.back();
+
+    auto anyRegsAlias = [](const MachineInstr *Dst,
+                           ArrayRef<MachineInstr *> Srcs,
+                           const TargetRegisterInfo *TRI) {
+      for (const MachineInstr *Src : Srcs)
+        if (Src != Dst)
+          if (TRI->regsOverlap(Dst->getOperand(0).getReg(),
+                               Src->getOperand(1).getReg()))
+            return true;
+      return false;
+    };
+
+    // If any of the destination registers in the bundle of copies alias any of
+    // the source registers, try to schedule the instructions to avoid any
+    // clobbering.
+    for (int E = MIs.size(), PrevE; E > 1; PrevE = E) {
+      for (int I = E; I--; )
+        if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) {
+          if (I + 1 != E)
+            std::swap(MIs[I], MIs[E - 1]);
+          --E;
+        }
+      if (PrevE == E) {
+        MF->getFunction().getContext().emitError(
+            "register rewriting failed: cycle in copy bundle");
+        break;
+      }
     }
 
-    for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
-         I->isBundledWithPred(); ) {
-      MachineInstr &MI = *I;
-      ++I;
+    MachineInstr *BundleStart = FirstMI;
+    for (MachineInstr *BundledMI : llvm::reverse(MIs)) {
+      // If instruction is in the middle of the bundle, move it before the
+      // bundle starts, otherwise, just unbundle it. When we get to the last
+      // instruction, the bundle will have been completely undone.
+      if (BundledMI != BundleStart) {
+        BundledMI->removeFromBundle();
+        MBB.insert(FirstMI, BundledMI);
+      } else if (BundledMI->isBundledWithSucc()) {
+        BundledMI->unbundleFromSucc();
+        BundleStart = &*std::next(BundledMI->getIterator());
+      }
 
-      MI.unbundleFromPred();
-      if (Indexes)
-        Indexes->insertMachineInstrInMaps(MI);
+      if (Indexes && BundledMI != FirstMI)
+        Indexes->insertMachineInstrInMaps(*BundledMI);
     }
   }
 }
diff --git a/llvm/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir b/llvm/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir
new file mode 100644 (file)
index 0000000..093473a
--- /dev/null
@@ -0,0 +1,16 @@
+# RUN: not llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o /dev/null 2>&1 | FileCheck %s
+
+# Check we don't infinitely loop on cycles in copy bundles.
+# CHECK: error: register rewriting failed: cycle in copy bundle
+
+---
+name: func0
+body: |
+  bb.0:
+    $x0 = IMPLICIT_DEF
+    $q0_q1_q2_q3 = IMPLICIT_DEF
+    $q1_q2_q3 = COPY $q0_q1_q2 {
+      $q2_q3_q4 = COPY $q1_q2_q3
+    }
+    ST4i64 $q1_q2_q3_q4, 0, $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/overlapping-copy-bundle.mir b/llvm/test/CodeGen/AArch64/overlapping-copy-bundle.mir
new file mode 100644 (file)
index 0000000..84f39ea
--- /dev/null
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s
+---
+name: func0
+body: |
+  bb.0:
+    ; Make sure we don't clobber q3 when we expand the bundle
+    ; CHECK-LABEL: name: func0
+    ; CHECK: $x0 = IMPLICIT_DEF
+    ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
+    ; CHECK: $q4 = COPY $q3
+    ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2
+    ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
+    $x0 = IMPLICIT_DEF
+    $q0_q1_q2_q3 = IMPLICIT_DEF
+    $q1_q2_q3 = COPY $q0_q1_q2 {
+      $q4 = COPY $q3
+    }
+    ST4i64 $q1_q2_q3_q4, 0, $x0
+
+...
+---
+name: func1
+body: |
+  bb.0:
+    ; If it was already ordered, make sure we don't break it
+    ; CHECK-LABEL: name: func1
+    ; CHECK: $x0 = IMPLICIT_DEF
+    ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
+    ; CHECK: $q4 = COPY $q3
+    ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2
+    ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
+    $x0 = IMPLICIT_DEF
+    $q0_q1_q2_q3 = IMPLICIT_DEF
+    $q4 = COPY $q3 {
+      $q1_q2_q3 = COPY $q0_q1_q2
+    }
+    ST4i64 $q1_q2_q3_q4, 0, $x0
+
+...
+---
+name: func2
+body: |
+  bb.0:
+    ; A bit less realistic, but check that we handle multiple nodes
+    ; CHECK-LABEL: name: func2
+    ; CHECK: $x0 = IMPLICIT_DEF
+    ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF
+    ; CHECK: $q3 = COPY $q2
+    ; CHECK: $q4 = COPY $q1
+    ; CHECK: $q1_q2 = COPY $q0_q1
+    ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0
+    $x0 = IMPLICIT_DEF
+    $q0_q1_q2_q3 = IMPLICIT_DEF
+    $q1_q2 = COPY $q0_q1 {
+      $q3 = COPY $q2
+      $q4 = COPY $q1
+    }
+    ST4i64 $q1_q2_q3_q4, 0, $x0
+
+...
+---
+name: func3
+body: |
+  bb.0:
+    ; If there was nothing wrong, don't change the order for no reason
+    ; CHECK-LABEL: name: func3
+    ; CHECK: $x0 = IMPLICIT_DEF
+    ; CHECK: $q1_q2_q3_q4 = IMPLICIT_DEF
+    ; CHECK: $q0_q1 = COPY $q1_q2
+    ; CHECK: $q2_q3 = COPY $q3_q4
+    ; CHECK: ST4i64 $q0_q1_q2_q3, 0, $x0
+    $x0 = IMPLICIT_DEF
+    $q1_q2_q3_q4 = IMPLICIT_DEF
+    $q0_q1 = COPY $q1_q2 {
+      $q2_q3 = COPY $q3_q4
+    }
+    ST4i64 $q0_q1_q2_q3, 0, $x0
+
+...