}
}
+/// Return a register which can be used as a bit to test in a TB(N)Z.
+static Register getTestBitReg(Register Reg, MachineRegisterInfo &MRI) {
+ assert(Reg.isValid() && "Expected valid register!");
+ while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
+ unsigned Opc = MI->getOpcode();
+ Register NextReg;
+
+ // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+ if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT)
+ NextReg = MI->getOperand(1).getReg();
+
+ // Did we find something worth folding?
+ if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
+ break;
+
+ // NextReg is worth folding. Keep looking.
+ Reg = NextReg;
+ }
+ return Reg;
+}
+
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
return false;
MachineRegisterInfo &MRI = *MIB.getMRI();
- Register TestReg = AndInst->getOperand(1).getReg();
// Only support EQ and NE. If we have LT, then it *is* possible to fold, but
// we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
return false;
+
+ // Try to optimize the TB(N)Z.
uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
+ Register TestReg = AndInst->getOperand(1).getReg();
+ TestReg = getTestBitReg(TestReg, MRI);
// Choose the correct TB(N)Z opcode to use.
unsigned Opc = 0;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check that we can continue matching when we are in a situation where we will
+# emit a TB(N)Z.
+...
+---
+name: fold_zext
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: fold_zext
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: TBNZW %copy, 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %fold_me:gpr(s64) = G_ZEXT %copy(s32)
+ %and:gpr(s64) = G_AND %fold_me, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: fold_anyext
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: fold_anyext
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: TBNZW %copy, 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %fold_me:gpr(s64) = G_ANYEXT %copy(s32)
+ %and:gpr(s64) = G_AND %fold_me, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: fold_multiple
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: fold_multiple
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $h0
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
+ ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
+ ; CHECK: TBNZW [[COPY]], 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $h0
+ %copy:gpr(s16) = COPY $h0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %ext1:gpr(s32) = G_ZEXT %copy(s16)
+ %ext2:gpr(s64) = G_ANYEXT %ext1(s32)
+ %and:gpr(s64) = G_AND %ext2, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: dont_fold_more_than_one_use
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_more_than_one_use
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
+ ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
+ ; CHECK: TBNZW %copy, 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: $x0 = COPY %zext
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %zext:gpr(s64) = G_ZEXT %copy(s32)
+ %and:gpr(s64) = G_AND %zext, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ $x0 = COPY %zext:gpr(s64)
+ RET_ReallyLR implicit $x0