[MachineCSE][NFC]: Refactor and comment on preventing CSE for isConvergent instrs

author Michael Kitzan <mkitzan@apple.com>

Sat, 1 May 2021 02:50:54 +0000 (19:50 -0700)

committer Michael Kitzan <mkitzan@apple.com>

Wed, 5 May 2021 21:22:03 +0000 (14:22 -0700)
author Michael Kitzan <mkitzan@apple.com>
Sat, 1 May 2021 02:50:54 +0000 (19:50 -0700)
committer Michael Kitzan <mkitzan@apple.com>
Wed, 5 May 2021 21:22:03 +0000 (14:22 -0700)
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp

index de90d0f..cb2e18e 100644 (file)
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -433,11 +433,6 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
                                     MachineBasicBlock *CSBB, MachineInstr *MI) {
    // FIXME: Heuristics that works around the lack the live range splitting.
  
-  MachineBasicBlock *BB = MI->getParent();
-  // Prevent CSE-ing non-local convergent instructions.
-  if (MI->isConvergent() && CSBB != BB)
-    return false;
-
    // If CSReg is used at all uses of Reg, CSE should not increase register
    // pressure of CSReg.
    bool MayIncreasePressure = true;
@@ -460,6 +455,7 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
    // an immediate predecessor. We don't want to increase register pressure and
    // end up causing other computation to be spilled.
    if (TII->isAsCheapAsAMove(*MI)) {
+    MachineBasicBlock *BB = MI->getParent();
      if (CSBB != BB && !CSBB->isSuccessor(BB))
        return false;
    }
@@ -592,6 +588,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
      LLVM_DEBUG(dbgs() << "Examining: " << *MI);
      LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  
+    // Prevent CSE-ing non-local convergent instructions.
+    // LLVM's current definition of `isConvergent` does not necessarily prove
+    // that non-local CSE is illegal. The following check extends the definition
+    // of `isConvergent` to assume a convergent instruction is dependent not
+    // only on additional conditions, but also on fewer conditions. LLVM does
+    // not have a MachineInstr attribute which expresses this extended
+    // definition, so it's necessary to use `isConvergent` to prevent illegally
+    // CSE-ing the subset of `isConvergent` instructions which do fall into this
+    // extended definition.
+    if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) {
+      LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
+                           "different BBs, avoid CSE!\n");
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+      continue;
+    }
+
      // Check if it's profitable to perform this CSE.
      bool DoCSE = true;
      unsigned NumDefs = MI->getNumDefs();
@@ -824,6 +837,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
        if (BB != nullptr && BB1 != nullptr &&
            (isPotentiallyReachable(BB1, BB) ||
             isPotentiallyReachable(BB, BB1))) {
+        // The following check extends the definition of `isConvergent` to
+        // assume a convergent instruction is dependent not only on additional
+        // conditions, but also on fewer conditions. LLVM does not have a
+        // MachineInstr attribute which expresses this extended definition, so
+        // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
+        // subset of `isConvergent` instructions which do fall into this
+        // extended definition.
+        if (MI->isConvergent() && CMBB != MBB)
+          continue;
  
          assert(MI->getOperand(0).isDef() &&
                 "First operand of instr with one explicit def must be this def");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir

index f41b759..26d5e5f 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir
@@ -1,9 +1,22 @@
  # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - -run-pass=machine-cse %s | FileCheck %s
  
-# Check that we don't CSE non-local convergent instrs. Otherwise, reusing defs
-# of convergent instrs from different control flow scopes can cause illegal
-# codegen. Previously, the swizzle in bb2 would be CSE-ed in favor of using the
-# swizzle in bb1 despite bb2 being a different control flow scope.
+# LLVM's current definition of `isConvergent` does not necessarily prove that
+# non-local CSE is illegal. The following test extends the definition of
+# `isConvergent` to assume a convergent instruction is dependent not only on
+# additional conditions, but also on fewer conditions. LLVM does not have a
+# MachineInstr attribute which expresses this extended definition, so it's
+# necessary to use `isConvergent` to prevent illegally CSE-ing the subset of
+# `isConvergent` instructions which do fall into this extended definition.
+
+# This is a coverage test for the MachineCSE change. It does not reproduce an
+# actual bug in the AMDGPU backend. The current open source GPU backends as is
+# do not appear to allow a reasonably simple test case that provably and
+# undeniably functionally breaks without the associated MachineCSE changes.
+
+# The test checks that we don't CSE non-local convergent instrs. Otherwise,
+# reusing defs of convergent instrs from different control flow scopes can
+# cause illegal codegen. Previously, the swizzle in bb2 would be CSE-ed in
+# favor of using the swizzle in bb1 despite bb2 being a different BBs.
  
  # CHECK-LABEL: name: no_cse
  # CHECK: bb.1.if.then
author	Michael Kitzan <mkitzan@apple.com>
	Sat, 1 May 2021 02:50:54 +0000 (19:50 -0700)
committer	Michael Kitzan <mkitzan@apple.com>
	Wed, 5 May 2021 21:22:03 +0000 (14:22 -0700)
llvm/lib/CodeGen/MachineCSE.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir		patch \| blob \| history