}
/// Return a register which can be used as a bit to test in a TB(N)Z.
-static Register getTestBitReg(Register Reg, uint64_t Bit,
+static Register getTestBitReg(Register Reg, uint64_t &Bit,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
}
if (VRegAndVal)
C = VRegAndVal->Value;
+ break;
+ }
+ case TargetOpcode::G_SHL: {
+ TestReg = MI->getOperand(1).getReg();
+ auto VRegAndVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (VRegAndVal)
+ C = VRegAndVal->Value;
+ break;
}
}
if ((*C >> Bit) & 1)
NextReg = TestReg;
break;
+ case TargetOpcode::G_SHL:
+ // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
+ // the type of the register.
+ if (*C <= Bit && (Bit - *C) < MRI.getType(TestReg).getSizeInBits()) {
+ NextReg = TestReg;
+ Bit = Bit - *C;
+ }
+ break;
}
// Check if we found anything worth folding.
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z.
+...
+---
+name: fold_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: fold_shl
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64all = COPY $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: TBNZW [[COPY1]], 2, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+
+ ; tbnz (shl x, 1), 3 == tbnz x, 2
+ %fold_cst:gpr(s64) = G_CONSTANT i64 1
+ %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+ %and:gpr(s64) = G_AND %fold_me, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: dont_fold_shl_1
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_shl_1
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+
+ ; 5 > 3, so we cannot do the transformation as above.
+ %fold_cst:gpr(s64) = G_CONSTANT i64 5
+ %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+ %and:gpr(s64) = G_AND %fold_me, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: dont_fold_shl_2
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_shl_2
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %fold_cst:gpr64 = MOVi64imm -5
+ ; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK: TBNZW [[COPY2]], 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+
+ ; Same case as above, except we wrap around.
+ %fold_cst:gpr(s64) = G_CONSTANT i64 -5
+ %fold_me:gpr(s64) = G_SHL %copy, %fold_cst
+
+ %and:gpr(s64) = G_AND %fold_me, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR