OpChanged = true;
break;
}
+ case AArch64::RDFFR_PPz: {
+ // rdffr p1.b, PredMask=p0/z <--- Definition of Pred
+ // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
+ // `rdffrs p1.b, p0/z` above.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ NewOp = AArch64::RDFFRS_PPz;
+ OpChanged = true;
+ break;
+ }
default:
// Bail out if we don't recognize the input
return false;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- // If the predicate is in a different block (possibly because its been
- // hoisted out), then assume the flags are set in between statements.
- if (Pred->getParent() != PTest->getParent())
+ // If another instruction between Pred and PTest accesses flags, don't remove
+ // the ptest or update the earlier instruction to modify them.
+ if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
return false;
- // If another instruction between the propagation and test sets the
- // flags, don't remove the ptest.
- MachineBasicBlock::iterator I = Pred, E = PTest;
- ++I; // Skip past the predicate op itself.
- for (; I != E; ++I) {
- const MachineInstr &Inst = *I;
-
- // TODO: If the ptest flags are unused, we could still remove it.
- if (Inst.modifiesRegister(AArch64::NZCV, TRI))
- return false;
- }
-
// If we pass all the checks, it's safe to remove the PTEST and use the flags
// as they are prior to PTEST. Sometimes this requires the tested PTEST
// operand to be replaced with an equivalent instruction that also sets the
--- /dev/null
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+# Test that RDFFR followed by PTEST is replaced with RDFFRS.
+---
+# CHECK-LABEL: name:{{\s*}} substitute_rdffr_pp_with_rdffrs_pp
+name: substitute_rdffr_pp_with_rdffrs_pp
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $ffr, $p0
+ %0:ppr_3b = COPY $p0
+
+ ; CHECK: RDFFRS_PPz
+ ; CHECK-NOT: PTEST
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
+
+ ; Consume nzcv
+ %2:gpr32 = COPY $wzr
+ %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
+name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $ffr, $p0, $p1
+ %0:ppr_3b = COPY $p0
+ %1:ppr_3b = COPY $p1
+
+ ; CHECK: RDFFR_PPz
+ ; CHECK: PTEST
+ %2:ppr_3b = RDFFR_PPz %0:ppr_3b
+ PTEST_PP killed %1:ppr_3b, killed %2:ppr_3b, implicit-def $nzcv
+
+ ; Consume nzcv
+ %3:gpr32 = COPY $wzr
+ %4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+...
+---
+# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
+name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $ffr, $p0, $x0
+ %0:ppr_3b = COPY $p0
+
+ ; CHECK: RDFFR_PPz
+ ; CHECK-NEXT: ADDSXrr
+ ; CHECK-NEXT: PTEST_PP
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ ; Clobber nzcv
+ $x0 = ADDSXrr $x0, $x0, implicit-def $nzcv
+ PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
+
+ ; Consume nzcv
+ %2:gpr32 = COPY $wzr
+ %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
+name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $ffr, $p0, $x0
+ %0:ppr_3b = COPY $p0
+
+ $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
+
+ ; CHECK: RDFFR_PPz
+ ; CHECK-NEXT: CSINCWr
+ ; CHECK-NEXT: PTEST_PP
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ ; Consume nzcv
+ %2:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+ PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
+
+ ; Consume nzcv
+ %3:gpr32 = COPY $wzr
+ %4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
+ $w0 = ORRWrs %4, %2, 1
+ RET_ReallyLR implicit $w0