From c73ae747cb0c5546ac4a91a9609d5407e5b03897 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Mon, 14 Dec 2020 15:31:50 +0000 Subject: [PATCH] [AArch64][SVE] Add optimization to remove redundant ptest instructions Co-Authored-by: Graham Hunter Co-Authored-by: Paul Walker Differential Revision: https://reviews.llvm.org/D93292 --- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 4 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 139 ++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 30 ++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 11 + llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll | 79 +++ .../CodeGen/AArch64/sve-ptest-removal-cmpeq.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-cmpeq.mir | 534 +++++++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-cmpge.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-cmpgt.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-cmphi.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-cmphs.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-cmple.ll | 39 ++ .../CodeGen/AArch64/sve-ptest-removal-cmplo.ll | 39 ++ .../CodeGen/AArch64/sve-ptest-removal-cmpls.ll | 39 ++ .../CodeGen/AArch64/sve-ptest-removal-cmplt.ll | 39 ++ .../CodeGen/AArch64/sve-ptest-removal-cmpne.ll | 54 +++ .../CodeGen/AArch64/sve-ptest-removal-whilege.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilegt.mir | 475 ++++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilehi.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilehs.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilele.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilelo.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilels.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilelt.mir | 444 +++++++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilerw.mir | 306 ++++++++++++ .../CodeGen/AArch64/sve-ptest-removal-whilewr.mir | 306 ++++++++++++ llvm/test/CodeGen/AArch64/sve-setcc.ll | 1 - 27 files changed, 5472 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 3335071..cf08f56 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -60,10 +60,14 @@ class AArch64Inst : Instruction { bits<2> Form = F.Value; // Defaults + bit isWhile = 0; + bit isPTestLike = 0; FalseLanesEnum FalseLanes = FalseLanesNone; DestructiveInstTypeEnum DestructiveInstType = NotDestructive; ElementSizeEnum ElementSize = ElementSizeNone; + let TSFlags{10} = isPTestLike; + let TSFlags{9} = isWhile; let TSFlags{8-7} = FalseLanes.Value; let TSFlags{6-3} = DestructiveInstType.Value; let TSFlags{2-0} = ElementSize.Value; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index ecd0c07..3dbda0d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1119,6 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, switch (MI.getOpcode()) { default: break; + case AArch64::PTEST_PP: + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = MI.getOperand(1).getReg(); + // Not sure about the mask and value for now... + CmpMask = ~0; + CmpValue = 0; + return true; case AArch64::SUBSWrr: case AArch64::SUBSWrs: case AArch64::SUBSWrx: @@ -1290,6 +1297,127 @@ static bool areCFlagsAccessedBetweenInstrs( return false; } +/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating +/// operation which could set the flags in an identical manner +bool AArch64InstrInfo::optimizePTestInstr( + MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, + const MachineRegisterInfo *MRI) const { + auto *Mask = MRI->getUniqueVRegDef(MaskReg); + auto *Pred = MRI->getUniqueVRegDef(PredReg); + auto NewOp = Pred->getOpcode(); + bool OpChanged = false; + + unsigned MaskOpcode = Mask->getOpcode(); + unsigned PredOpcode = Pred->getOpcode(); + bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); + bool PredIsWhileLike = isWhileOpcode(PredOpcode); + + if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't + // deactivate any lanes OTHER_INST might set. + uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); + uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); + + // Must be an all active predicate of matching element size. + if ((PredElementSize != MaskElementSize) || + (Mask->getOperand(1).getImm() != 31)) + return false; + + // Fallthough to simply remove the PTEST. + } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { + // For PTEST(PG, PG), PTEST is redundant when PG is the result of an + // instruction that sets the flags as PTEST would. + + // Fallthough to simply remove the PTEST. + } else if (PredIsPTestLike) { + // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both + // instructions use the same predicate. + auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PTestLikeMask) + return false; + + // Fallthough to simply remove the PTEST. + } else { + switch (Pred->getOpcode()) { + case AArch64::BRKB_PPzP: + case AArch64::BRKPB_PPzPP: { + // Op 0 is chain, 1 is the mask, 2 the previous predicate to + // propagate, 3 the new predicate. + + // Check to see if our mask is the same as the brkpb's. If + // not the resulting flag bits may be different and we + // can't remove the ptest. + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + // Switch to the new opcode + NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP + : AArch64::BRKPBS_PPzPP; + OpChanged = true; + break; + } + case AArch64::BRKN_PPzP: { + auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PredMask) + return false; + + NewOp = AArch64::BRKNS_PPzP; + OpChanged = true; + break; + } + default: + // Bail out if we don't recognize the input + return false; + } + } + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // If the predicate is in a different block (possibly because its been + // hoisted out), then assume the flags are set in between statements. + if (Pred->getParent() != PTest->getParent()) + return false; + + // If another instruction between the propagation and test sets the + // flags, don't remove the ptest. + MachineBasicBlock::iterator I = Pred, E = PTest; + ++I; // Skip past the predicate op itself. + for (; I != E; ++I) { + const MachineInstr &Inst = *I; + + // TODO: If the ptest flags are unused, we could still remove it. + if (Inst.modifiesRegister(AArch64::NZCV, TRI)) + return false; + } + + // If we pass all the checks, it's safe to remove the PTEST and use the flags + // as they are prior to PTEST. Sometimes this requires the tested PTEST + // operand to be replaced with an equivalent instruction that also sets the + // flags. + Pred->setDesc(get(NewOp)); + PTest->eraseFromParent(); + if (OpChanged) { + bool succeeded = UpdateOperandRegClass(*Pred); + (void)succeeded; + assert(succeeded && "Operands have incompatible register classes!"); + Pred->addRegisterDefined(AArch64::NZCV, TRI); + } + + // Ensure that the flags def is live. + if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { + unsigned i = 0, e = Pred->getNumOperands(); + for (; i != e; ++i) { + MachineOperand &MO = Pred->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { + MO.setIsDead(false); + break; + } + } + } + return true; +} + /// Try to optimize a compare instruction. A compare instruction is an /// instruction which produces AArch64::NZCV. It can be truly compare /// instruction @@ -1328,6 +1456,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } + if (CmpInstr.getOpcode() == AArch64::PTEST_PP) + return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); + // Continue only if we have a "ri" where immediate is zero. // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare // function. @@ -7042,6 +7173,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { return get(Opc).TSFlags & AArch64::ElementSizeMask; } +bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; +} + +bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { + return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; +} + unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { if (MF.getSubtarget().hardenSlsBlr()) return AArch64::BLRNoIP; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index b45ae92..7434987 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -280,6 +280,12 @@ public: bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; /// Returns the vector element size (B, H, S or D) of an SVE opcode. uint64_t getElementSizeForOpcode(unsigned Opc) const; + /// Returns true if the opcode is for an SVE instruction that sets the + /// condition codes as if it's results had been fed to a PTEST instruction + /// along with the same general predicate. + bool isPTestLikeOpcode(unsigned Opc) const; + /// Returns true if the opcode is for an SVE WHILE## instruction. + bool isWhileOpcode(unsigned Opc) const; /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. static bool isFalkorShiftExtFast(const MachineInstr &MI); @@ -328,6 +334,12 @@ private: /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + + /// Remove a ptest of a predicate-generating operation that already sets, or + /// can be made to set, the condition codes in an identical manner + bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, + unsigned PredReg, + const MachineRegisterInfo *MRI) const; }; /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI @@ -411,6 +423,18 @@ static inline bool isIndirectBranchOpcode(int Opc) { return false; } +static inline bool isPTrueOpcode(unsigned Opc) { + switch (Opc) { + case AArch64::PTRUE_B: + case AArch64::PTRUE_H: + case AArch64::PTRUE_S: + case AArch64::PTRUE_D: + return true; + default: + return false; + } +} + /// Return opcode to be used for indirect calls. unsigned getBLRCallOpcode(const MachineFunction &MF); @@ -418,6 +442,7 @@ unsigned getBLRCallOpcode(const MachineFunction &MF); #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits +#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits // } namespace AArch64 { @@ -450,9 +475,14 @@ enum FalseLaneType { FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2), }; +// NOTE: This is a bit field. +static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); +static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); + #undef TSFLAG_ELEMENT_SIZE_TYPE #undef TSFLAG_DESTRUCTIVE_INST_TYPE #undef TSFLAG_FALSE_LANE_TYPE +#undef TSFLAG_INSTR_FLAGS int getSVEPseudoMap(uint16_t Opcode); int getSVERevInstr(uint16_t Opcode); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8208eb4..1020a81 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -280,6 +280,7 @@ class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty, let Inst{3-0} = Pd; let Defs = !if(!eq (opc{0}, 1), [NZCV], []); + let ElementSize = pprty.ElementSize; let isReMaterializable = 1; } @@ -532,6 +533,7 @@ class sve_int_ptest opc, string asm> let Inst{4-0} = 0b00000; let Defs = [NZCV]; + let isCompare = 1; } class sve_int_pfirst_next sz8_64, bits<5> opc, string asm, @@ -4285,6 +4287,8 @@ class sve_int_cmp sz8_64, bits<3> opc, string asm, let Inst{3-0} = Pd; let Defs = [NZCV]; + let ElementSize = pprty.ElementSize; + let isPTestLike = 1; } multiclass SVE_SETCC_Pat sz8_64, bits<3> opc, string asm, PPRRegOp pprty, let Defs = [NZCV]; let ElementSize = pprty.ElementSize; + let isPTestLike = 1; } multiclass SVE_SETCC_Imm_Pat sz8_64, bits<2> opc, string asm, PPRRegOp pprty, let Inst{3-0} = Pd; let Defs = [NZCV]; + let ElementSize = pprty.ElementSize; + let isPTestLike = 1; } multiclass sve_int_ucmp_vi opc, string asm, CondCode cc, @@ -4478,6 +4485,8 @@ class sve_int_while_rr sz8_64, bits<4> opc, string asm, let Inst{3-0} = Pd; let Defs = [NZCV]; + let ElementSize = pprty.ElementSize; + let isWhile = 1; } multiclass sve_int_while4_rr opc, string asm, SDPatternOperator op> { @@ -4522,6 +4531,8 @@ class sve2_int_while_rr sz8_64, bits<1> rw, string asm, let Inst{3-0} = Pd; let Defs = [NZCV]; + let ElementSize = pprty.ElementSize; + let isWhile = 1; } multiclass sve2_int_while_rr rw, string asm, string op> { diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll new file mode 100644 index 0000000..a593b98 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll @@ -0,0 +1,79 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; Test that redundant ptest instruction is removed when using a flag setting brk + +define i32 @brkpb( %pg, %a, %b) { +; CHECK-LABEL: brkpb: +; CHECK: brkpbs p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @brkb( %pg, %a) { +; CHECK-LABEL: brkb: +; CHECK: brkbs p0.b, p0/z, p1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @brkn( %pg, %a, %b) { +; CHECK-LABEL: brkn: +; CHECK: brkns p2.b, p0/z, p1.b, p2.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkn.z.nxv16i1( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; Test that ptest instruction is not removed when using a non-flag setting brk + +define i32 @brkpb_neg( %pg, %a, %b) { +; CHECK-LABEL: brkpb_neg: +; CHECK: brkpb p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %a, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @brkb_neg( %pg, %a) { +; CHECK-LABEL: brkb_neg: +; CHECK: brkb p0.b, p0/z, p1.b +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %a, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @brkn_neg( %pg, %a, %b) { +; CHECK-LABEL: brkn_neg: +; CHECK: brkn p2.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ptest p1, p2.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkn.z.nxv16i1( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %a, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) +declare @llvm.aarch64.sve.brkn.z.nxv16i1(, , ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll new file mode 100644 index 0000000..157a73b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmpeq_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpeq_nxv16i8: +; CHECK: cmpeq p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmpeq_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmpeq_imm_nxv16i8: +; CHECK: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmpeq_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpeq_wide_nxv16i8: +; CHECK: cmpeq p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpeq.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir new file mode 100644 index 0000000..b414111 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -0,0 +1,534 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: cmpeq_nxv16i8 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: cmpeq_nxv16i8 + ; CHECK: %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_nxv8i16 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr_3b } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_nxv8i16 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %4:ppr = CMPEQ_PPzZZ_H %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_nxv4i32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr_3b } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_nxv4i32 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %4:ppr = CMPEQ_PPzZZ_S %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_nxv2i64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr_3b } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_nxv2i64 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %4:ppr = CMPEQ_PPzZZ_D %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv16i8 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv16i8 + ; CHECK-NOT: PTEST + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + %3:ppr = PTRUE_B 31 + PTEST_PP killed %3, killed %2, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv8i16 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv8i16 + ; CHECK-NOT: PTEST + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_H %0, %1, 0, implicit-def dead $nzcv + PTEST_PP %0, %2, implicit-def $nzcv + %5:gpr32 = COPY $wzr + %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv4i32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv4i32 + ; CHECK-NOT: PTEST + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_S %0, %1, 0, implicit-def dead $nzcv + PTEST_PP %0, %2, implicit-def $nzcv + %5:gpr32 = COPY $wzr + %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv2i64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv2i64 + ; CHECK-NOT: PTEST + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_D %0, %1, 0, implicit-def dead $nzcv + PTEST_PP %0, %2, implicit-def $nzcv + %5:gpr32 = COPY $wzr + %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_wide_nxv16i8 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_wide_nxv16i8 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %3:ppr = CMPEQ_WIDE_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_wide_nxv8i16 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr_3b } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_wide_nxv8i16 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %4:ppr = CMPEQ_WIDE_PPzZZ_H %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_wide_nxv4i32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr_3b } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_wide_nxv4i32 + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %4:ppr = CMPEQ_WIDE_PPzZZ_S %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %0, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv16i8_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_not_all_active + ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + ; CHECK-NEXT: %3:ppr = PTRUE_B 0 + ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + %3:ppr = PTRUE_B 0 + PTEST_PP killed %3, killed %2, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv16i8_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_of_halfs + ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + ; CHECK-NEXT: %3:ppr = PTRUE_H 31 + ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + %3:ppr = PTRUE_H 31 + PTEST_PP killed %3, killed %2, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_imm_nxv16i8_ptest_with_unique_pg +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$p1', virtual-reg: '%3' } + - { reg: '$z0', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $p1, $z0 + + ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_with_unique_pg + ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + ; CHECK-NEXT: %3:ppr = COPY $p1 + ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv + %3:ppr = COPY $p1 + PTEST_PP killed %3, killed %2, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: cmpeq_nxv16i8_ptest_with_matching_operands +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: ppr_3b } + - { id: 1, class: zpr } + - { id: 2, class: zpr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$p0', virtual-reg: '%0' } + - { reg: '$z0', virtual-reg: '%1' } + - { reg: '$z1', virtual-reg: '%2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $p0, $z0, $z1 + + ; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_matching_operands + ; CHECK-NOT: PTEST + %2:zpr = COPY $z1 + %1:zpr = COPY $z0 + %0:ppr_3b = COPY $p0 + %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv + PTEST_PP %3, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll new file mode 100644 index 0000000..6363c3d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmpge_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpge_nxv16i8: +; CHECK: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmpge_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmpge_imm_nxv16i8: +; CHECK: cmpge p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmpge_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpge_wide_nxv16i8: +; CHECK: cmpge p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll new file mode 100644 index 0000000..4d3c7e0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmpgt_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpgt_nxv16i8: +; CHECK: cmpgt p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmpgt_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmpgt_imm_nxv16i8: +; CHECK: cmpgt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmpgt_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpgt_wide_nxv16i8: +; CHECK: cmpgt p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll new file mode 100644 index 0000000..5bba0b4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmphi_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmphi_nxv16i8: +; CHECK: cmphi p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmphi_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmphi_imm_nxv16i8: +; CHECK: cmphi p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmphi_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmphi_wide_nxv16i8: +; CHECK: cmphi p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphi.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll new file mode 100644 index 0000000..ff5a1ec --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmphs_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmphs_nxv16i8: +; CHECK: cmphs p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmphs_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmphs_imm_nxv16i8: +; CHECK: cmphs p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmphs_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmphs_wide_nxv16i8: +; CHECK: cmphs p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphs.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll new file mode 100644 index 0000000..3513ace --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Immediate Compares +; + +define i32 @cmple_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmple_imm_nxv16i8: +; CHECK: cmple p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, zeroinitializer, %a) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmple_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmple_wide_nxv16i8: +; CHECK: cmple p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmple.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmple.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll new file mode 100644 index 0000000..eae748d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Immediate Compares +; + +define i32 @cmplo_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmplo_imm_nxv16i8: +; CHECK: cmplo p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, zeroinitializer, %a) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmplo_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmplo_wide_nxv16i8: +; CHECK: cmplo p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmplo.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplo.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll new file mode 100644 index 0000000..d53ece9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Immediate Compares +; + +define i32 @cmpls_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmpls_imm_nxv16i8: +; CHECK: cmpls p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, zeroinitializer, %a) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmpls_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpls_wide_nxv16i8: +; CHECK: cmpls p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpls.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpls.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll new file mode 100644 index 0000000..fca33d7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Immediate Compares +; + +define i32 @cmplt_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmplt_imm_nxv16i8: +; CHECK: cmplt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, zeroinitializer, %a) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmplt_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmplt_wide_nxv16i8: +; CHECK: cmplt p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmplt.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplt.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll new file mode 100644 index 0000000..ead20da --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + +; +; Compares +; + +define i32 @cmpne_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpne_nxv16i8: +; CHECK: cmpne p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +; +; Immediate Compares +; + +define i32 @cmpne_imm_nxv16i8( %pg, %a) { +; CHECK-LABEL: cmpne_imm_nxv16i8: +; CHECK: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, zeroinitializer) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; +; Wide Compares +; + +define i32 @cmpne_wide_nxv16i8( %pg, %a, %b) { +; CHECK-LABEL: cmpne_wide_nxv16i8: +; CHECK: cmpne p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.cmpne.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) + +declare i1 @llvm.aarch64.sve.ptest.any(, ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir new file mode 100644 index 0000000..eaec3fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilege_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilege_b8_s32 + ; CHECK: %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilege_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilege_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEGE_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilege_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEGE_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilege_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEGE_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilege_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEGE_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilege_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEGE_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilege_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEGE_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b8_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 0 + %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b8_s64_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b8_s64_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_words + ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilege_b8_s64_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir new file mode 100644 index 0000000..c48df41 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir @@ -0,0 +1,475 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilegt_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilegt_b8_s32 + ; CHECK: %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilegt_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEGT_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilegt_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEGT_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilegt_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilegt_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEGT_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilegt_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEGT_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilegt_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEGT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilegt_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEGT_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 1 + %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s64_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s64_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_words + ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b16_s64_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilegt_b8_s32_ptest_with_matching_operands +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilegt_b8_s32_ptest_with_matching_operands + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP %2, killed %2, implicit-def $nzcv + %3:gpr32 = COPY $wzr + %4:gpr32 = CSINCWr %3, $wzr, 0, implicit $nzcv + $w0 = COPY %4 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir new file mode 100644 index 0000000..f186e82 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilehi_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilehi_b8_s32 + ; CHECK: %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehi_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHI_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehi_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEHI_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehi_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEHI_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehi_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEHI_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehi_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehi_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEHI_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehi_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEHI_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHI, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 29 + %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s64_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHI, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s64_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHI, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehi_b32_s64_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHI, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir new file mode 100644 index 0000000..1c35524 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilehs_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilehs_b8_s32 + ; CHECK: %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehs_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHS_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehs_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEHS_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehs_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEHS_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehs_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEHS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehs_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEHS_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilehs_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEHS_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilehs_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 30 + %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s64_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s64_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilehs_b64_s64_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEHS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_words + ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir new file mode 100644 index 0000000..32954d5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilele_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilele_b8_s32 + ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilele_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELE_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilele_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELE_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilele_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELE_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilele_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELE_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilele_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELE_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilele_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELE_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilele_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELE_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b8_s32_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 7 + %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b8_s32_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b8_s32_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_words + ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilele_b8_s32_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir new file mode 100644 index 0000000..cca0ab8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilelo_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilelo_b8_s32 + ; CHECK: %3:ppr = WHILELO_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELO_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelo_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELO_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelo_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelo_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELO_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelo_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELO_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelo_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELO_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelo_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELO_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelo_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELO_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s32_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELO, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 6 + %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s32_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELO, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s32_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELO, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_words + ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelo_b16_s32_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELO, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir new file mode 100644 index 0000000..4bae3a1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilels_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilels_b8_s32 + ; CHECK: %3:ppr = WHILELS_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELS_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilels_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELS_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilels_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELS_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilels_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELS_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilels_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilels_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELS_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilels_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELS_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilels_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELS_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s32_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 5 + %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s32_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s32_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilels_b32_s32_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELS, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir new file mode 100644 index 0000000..3c6a9e21 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir @@ -0,0 +1,444 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilelt_b8_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilelt_b8_s32 + ; CHECK: %3:ppr = WHILELT_PWW_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELT_PWW_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelt_b8_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELT_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b16_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelt_b16_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELT_PWW_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelt_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILELT_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b32_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelt_b32_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELT_PWW_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelt_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILELT_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s32 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: whilelt_b64_s32 + ; CHECK-NOT: PTEST + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilelt_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILELT_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s32_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_D 4 + %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s32_keep_ptest_of_bytes +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_bytes + ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s32_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilelt_b64_s32_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $w0, $w1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILELT, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_words + ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr32 = COPY $w1 + %0:gpr32 = COPY $w0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir new file mode 100644 index 0000000..7b67f8d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir @@ -0,0 +1,306 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilerw_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilerw_b8_s64 + ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilerw_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILERW_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilerw_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILERW_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilerw_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILERW_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b8_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 0 + %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b8_s64_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b8_s64_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_words + ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilerw_b8_s64_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir new file mode 100644 index 0000000..ebf8e07 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir @@ -0,0 +1,306 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s + +# Test instruction sequences where PTEST is redundant and thus gets removed. +--- +name: whilewr_b8_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; Here we check the expected sequence with subsequent tests + ; just asserting there is no PTEST instruction. + ; + ; CHECK-LABEL: name: whilewr_b8_s64 + ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 31 + %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b16_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilewr_b16_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %4:ppr = WHILEWR_PXX_H %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b32_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilewr_b32_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %4:ppr = WHILEWR_PXX_S %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b64_s64 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: ppr } + - { id: 5, class: ppr } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: whilewr_b64_s64 + ; CHECK-NOT: PTEST + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %4:ppr = WHILEWR_PXX_D %0, %1, implicit-def dead $nzcv + PTEST_PP %2, %4, implicit-def $nzcv + %6:gpr32 = COPY $wzr + %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv + $w0 = COPY %7 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b8_s64_keep_ptest_not_all_active +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_not_all_active + ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_B 0 + %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b8_s64_keep_ptest_of_halfs +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_halfs + ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_H 31 + %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b8_s64_keep_ptest_of_words +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_words + ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_S 31 + %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: whilewr_b8_s64_keep_ptest_of_doublewords +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: ppr } + - { id: 3, class: ppr } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + liveins: $x0, $x1 + + ; PTEST is not redundant when it's Pg operand is not an all active predicate + ; of element size matching the WHILEGE, which is the implicitly predicate + ; used by WHILE when calculating the condition codes. + ; + ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_doublewords + ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv + ; CHECK-NEXT: %4:gpr32 = COPY $wzr + ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + %1:gpr64 = COPY $x1 + %0:gpr64 = COPY $x0 + %2:ppr = PTRUE_D 31 + %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv + PTEST_PP killed %2, killed %3, implicit-def $nzcv + %4:gpr32 = COPY $wzr + %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll index 3dbe0eb..c82771d 100644 --- a/llvm/test/CodeGen/AArch64/sve-setcc.ll +++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll @@ -8,7 +8,6 @@ define void @sve_cmplt_setcc_inverted(* %out, %in, %pg) { ; CHECK-LABEL: @sve_cmplt_setcc_inverted ; CHECK: cmplt p1.h, p0/z, z0.h, #0 -; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: b.ne entry: %0 = tail call @llvm.aarch64.sve.cmplt.wide.nxv8i16( %pg, %in, zeroinitializer) -- 2.7.4