[AArch64][GlobalISel] Fold G_SHL into TB(N)Z bit calculation

author Jessica Paquette <jpaquette@apple.com>

Mon, 3 Feb 2020 21:35:09 +0000 (13:35 -0800)

committer Jessica Paquette <jpaquette@apple.com>

Mon, 3 Feb 2020 22:27:08 +0000 (14:27 -0800)
author Jessica Paquette <jpaquette@apple.com>
Mon, 3 Feb 2020 21:35:09 +0000 (13:35 -0800)
committer Jessica Paquette <jpaquette@apple.com>
Mon, 3 Feb 2020 22:27:08 +0000 (14:27 -0800)
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

index d51157e..e7d90bf 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -991,7 +991,7 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
  }
  
  /// Return a register which can be used as a bit to test in a TB(N)Z.
-static Register getTestBitReg(Register Reg, uint64_t Bit,
+static Register getTestBitReg(Register Reg, uint64_t &Bit,
                                MachineRegisterInfo &MRI) {
    assert(Reg.isValid() && "Expected valid register!");
    while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
@@ -1031,6 +1031,15 @@ static Register getTestBitReg(Register Reg, uint64_t Bit,
        }
        if (VRegAndVal)
          C = VRegAndVal->Value;
+      break;
+    }
+    case TargetOpcode::G_SHL: {
+      TestReg = MI->getOperand(1).getReg();
+      auto VRegAndVal =
+          getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+      if (VRegAndVal)
+        C = VRegAndVal->Value;
+      break;
      }
      }
  
@@ -1049,6 +1058,14 @@ static Register getTestBitReg(Register Reg, uint64_t Bit,
        if ((*C >> Bit) & 1)
          NextReg = TestReg;
        break;
+    case TargetOpcode::G_SHL:
+      // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
+      // the type of the register.
+      if (*C <= Bit && (Bit - *C) < MRI.getType(TestReg).getSizeInBits()) {
+        NextReg = TestReg;
+        Bit = Bit - *C;
+      }
+      break;
      }
  
      // Check if we found anything worth folding.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir

new file mode 100644 (file)

index 0000000..388944c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z.
+...
+---
+name:            fold_shl
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: fold_shl
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64all = COPY $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 2, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    ; tbnz (shl x, 1), 3 == tbnz x, 2
+    %fold_cst:gpr(s64) = G_CONSTANT i64 1
+    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            dont_fold_shl_1
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_shl_1
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64 = COPY $x0
+  ; CHECK:   %fold_me:gpr64 = UBFMXri %copy, 59, 58
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    ; 5 > 3, so we cannot do the transformation as above.
+    %fold_cst:gpr(s64) = G_CONSTANT i64 5
+    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            dont_fold_shl_2
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_shl_2
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64 = COPY $x0
+  ; CHECK:   %fold_cst:gpr64 = MOVi64imm -5
+  ; CHECK:   %fold_me:gpr64 = LSLVXr %copy, %fold_cst
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    ; Same case as above, except we wrap around.
+    %fold_cst:gpr(s64) = G_CONSTANT i64 -5
+    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
author	Jessica Paquette <jpaquette@apple.com>
	Mon, 3 Feb 2020 21:35:09 +0000 (13:35 -0800)
committer	Jessica Paquette <jpaquette@apple.com>
	Mon, 3 Feb 2020 22:27:08 +0000 (14:27 -0800)
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir	[new file with mode: 0644]	patch \| blob