private:
MachineInstr *getOperandDef(const MachineOperand &MO);
- MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
+ MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting,
+ bool Is64Bit);
MachineInstr *convertToCondBr(MachineInstr &MI);
bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
};
}
MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
- bool IsFlagSetting) {
+ bool IsFlagSetting,
+ bool Is64Bit) {
// If this is already the flag setting version of the instruction (e.g., SUBS)
// just make sure the implicit-def of NZCV isn't marked dead.
if (IsFlagSetting) {
MO.setIsDead(false);
return &MI;
}
- bool Is64Bit;
- unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
+ unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode());
Register NewDestReg = MI.getOperand(0).getReg();
if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(MI.print(dbgs()));
- NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/false);
NewBr = convertToCondBr(MI);
break;
}
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(MI.print(dbgs()));
- NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/true);
NewBr = convertToCondBr(MI);
break;
}
// Fallthough to simply remove the PTEST.
} else {
- switch (Pred->getOpcode()) {
+ // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
+ // opcode so the PTEST becomes redundant.
+ switch (PredOpcode) {
+ case AArch64::AND_PPzPP:
+ case AArch64::BIC_PPzPP:
+ case AArch64::EOR_PPzPP:
+ case AArch64::NAND_PPzPP:
+ case AArch64::NOR_PPzPP:
+ case AArch64::ORN_PPzPP:
+ case AArch64::ORR_PPzPP:
+ case AArch64::BRKA_PPzP:
+ case AArch64::BRKPA_PPzPP:
case AArch64::BRKB_PPzP:
- case AArch64::BRKPB_PPzPP: {
- // Op 0 is chain, 1 is the mask, 2 the previous predicate to
- // propagate, 3 the new predicate.
-
- // Check to see if our mask is the same as the brkpb's. If
- // not the resulting flag bits may be different and we
- // can't remove the ptest.
+ case AArch64::BRKPB_PPzPP:
+ case AArch64::RDFFR_PPz: {
+ // Check to see if our mask is the same. If not the resulting flag bits
+ // may be different and we can't remove the ptest.
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
-
- // Switch to the new opcode
- NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
- : AArch64::BRKPBS_PPzPP;
- OpChanged = true;
break;
}
case AArch64::BRKN_PPzP: {
+ // BRKN uses an all active implicit mask to set flags unlike the other
+ // flag-setting instructions.
// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
if ((MaskOpcode != AArch64::PTRUE_B) ||
(Mask->getOperand(1).getImm() != 31))
return false;
-
- NewOp = AArch64::BRKNS_PPzP;
- OpChanged = true;
break;
}
- case AArch64::RDFFR_PPz: {
- // rdffr p1.b, PredMask=p0/z <--- Definition of Pred
- // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
- // `rdffrs p1.b, p0/z` above.
- auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PredMask)
- return false;
-
- NewOp = AArch64::RDFFRS_PPz;
- OpChanged = true;
+ case AArch64::PTRUE_B:
+ // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
break;
- }
default:
// Bail out if we don't recognize the input
return false;
}
+
+ NewOp = convertToFlagSettingOpc(PredOpcode);
+ OpChanged = true;
}
const TargetRegisterInfo *TRI = &getRegisterInfo();
}
}
-unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
- bool &Is64Bit) {
+unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no flag setting equivalent!");
// 32-bit cases:
case AArch64::ADDWri:
- Is64Bit = false;
return AArch64::ADDSWri;
case AArch64::ADDWrr:
- Is64Bit = false;
return AArch64::ADDSWrr;
case AArch64::ADDWrs:
- Is64Bit = false;
return AArch64::ADDSWrs;
case AArch64::ADDWrx:
- Is64Bit = false;
return AArch64::ADDSWrx;
case AArch64::ANDWri:
- Is64Bit = false;
return AArch64::ANDSWri;
case AArch64::ANDWrr:
- Is64Bit = false;
return AArch64::ANDSWrr;
case AArch64::ANDWrs:
- Is64Bit = false;
return AArch64::ANDSWrs;
case AArch64::BICWrr:
- Is64Bit = false;
return AArch64::BICSWrr;
case AArch64::BICWrs:
- Is64Bit = false;
return AArch64::BICSWrs;
case AArch64::SUBWri:
- Is64Bit = false;
return AArch64::SUBSWri;
case AArch64::SUBWrr:
- Is64Bit = false;
return AArch64::SUBSWrr;
case AArch64::SUBWrs:
- Is64Bit = false;
return AArch64::SUBSWrs;
case AArch64::SUBWrx:
- Is64Bit = false;
return AArch64::SUBSWrx;
// 64-bit cases:
case AArch64::ADDXri:
- Is64Bit = true;
return AArch64::ADDSXri;
case AArch64::ADDXrr:
- Is64Bit = true;
return AArch64::ADDSXrr;
case AArch64::ADDXrs:
- Is64Bit = true;
return AArch64::ADDSXrs;
case AArch64::ADDXrx:
- Is64Bit = true;
return AArch64::ADDSXrx;
case AArch64::ANDXri:
- Is64Bit = true;
return AArch64::ANDSXri;
case AArch64::ANDXrr:
- Is64Bit = true;
return AArch64::ANDSXrr;
case AArch64::ANDXrs:
- Is64Bit = true;
return AArch64::ANDSXrs;
case AArch64::BICXrr:
- Is64Bit = true;
return AArch64::BICSXrr;
case AArch64::BICXrs:
- Is64Bit = true;
return AArch64::BICSXrs;
case AArch64::SUBXri:
- Is64Bit = true;
return AArch64::SUBSXri;
case AArch64::SUBXrr:
- Is64Bit = true;
return AArch64::SUBSXrr;
case AArch64::SUBXrs:
- Is64Bit = true;
return AArch64::SUBSXrs;
case AArch64::SUBXrx:
- Is64Bit = true;
return AArch64::SUBSXrx;
+ // SVE instructions:
+ case AArch64::AND_PPzPP:
+ return AArch64::ANDS_PPzPP;
+ case AArch64::BIC_PPzPP:
+ return AArch64::BICS_PPzPP;
+ case AArch64::EOR_PPzPP:
+ return AArch64::EORS_PPzPP;
+ case AArch64::NAND_PPzPP:
+ return AArch64::NANDS_PPzPP;
+ case AArch64::NOR_PPzPP:
+ return AArch64::NORS_PPzPP;
+ case AArch64::ORN_PPzPP:
+ return AArch64::ORNS_PPzPP;
+ case AArch64::ORR_PPzPP:
+ return AArch64::ORRS_PPzPP;
+ case AArch64::BRKA_PPzP:
+ return AArch64::BRKAS_PPzP;
+ case AArch64::BRKPA_PPzPP:
+ return AArch64::BRKPAS_PPzPP;
+ case AArch64::BRKB_PPzP:
+ return AArch64::BRKBS_PPzP;
+ case AArch64::BRKPB_PPzPP:
+ return AArch64::BRKPBS_PPzPP;
+ case AArch64::BRKN_PPzP:
+ return AArch64::BRKNS_PPzP;
+ case AArch64::RDFFR_PPz:
+ return AArch64::RDFFRS_PPz;
+ case AArch64::PTRUE_B:
+ return AArch64::PTRUES_B;
}
}
/// Return the opcode that set flags when possible. The caller is
/// responsible for ensuring the opc has a flag setting equivalent.
- static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit);
+ static unsigned convertToFlagSettingOpc(unsigned Opc);
/// Return true if this is a load/store that can be potentially paired/merged.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
; CHECK-LABEL: reduce_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
; CHECK-LABEL: reduce_and_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
; CHECK-LABEL: reduce_and_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
; CHECK-LABEL: reduce_and_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
; CHECK-LABEL: reduce_smax_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
; CHECK-LABEL: reduce_smax_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
; CHECK-LABEL: reduce_smax_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
; CHECK-LABEL: reduce_smax_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
; CHECK-LABEL: reduce_umin_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
; CHECK-LABEL: reduce_umin_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
; CHECK-LABEL: reduce_umin_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
; CHECK-LABEL: reduce_umin_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
define i32 @brkpa(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: brkpa:
; CHECK: // %bb.0:
-; CHECK-NEXT: brkpa p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: brkpas p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i32 @brka(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: brka:
; CHECK: // %bb.0:
-; CHECK-NEXT: brka p1.b, p0/z, p1.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: brkas p0.b, p0/z, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
ret i32 %conv
}
-define i32 @brkn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: brkn:
-; CHECK: // %bb.0:
-; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p2.b
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
- %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
- %conv = zext i1 %2 to i32
- ret i32 %conv
-}
-
define i32 @brkn_all_active(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: brkn_all_active:
; CHECK: // %bb.0:
ret i32 %conv
}
+define i32 @brkn_neg2(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkn_neg2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: ptest p0, p2.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
define i1 @and(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: and:
; CHECK: // %bb.0:
-; CHECK-NEXT: and p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: ands p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @bic(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: bic:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: bics p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @eor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: eor:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: eors p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @nand(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: nand:
; CHECK: // %bb.0:
-; CHECK-NEXT: nand p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: nands p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @nor(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: nor:
; CHECK: // %bb.0:
-; CHECK-NEXT: nor p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: nors p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @orn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: orn:
; CHECK: // %bb.0:
-; CHECK-NEXT: orn p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: orns p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @orr(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: orr:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr p1.b, p0/z, p1.b, p2.b
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: orrs p0.b, p0/z, p1.b, p2.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
define i1 @ptrue() {
; CHECK-LABEL: ptrue:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, pow2
-; CHECK-NEXT: ptest p0, p0.b
+; CHECK-NEXT: ptrues p0.b, pow2
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 0)
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: ptrue p4.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p4/z, p0.b
-; CHECK-NEXT: ptest p4, p0.b
+; CHECK-NEXT: nots p0.b, p4/z, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: addvl sp, sp, #1
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
; CHECK-LABEL: reduce_and_insert_subvec_into_ones:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%allones.ins = insertelement <vscale x 16 x i1> poison, i1 1, i32 0
; CHECK-LABEL: reduce_and_insert_subvec_into_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: not p0.b, p1/z, p0.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: nots p0.b, p1/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
-; CHECK-NEXT: not p0.b, p2/z, p0.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: nots p0.b, p2/z, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)