#define DEBUG_TYPE "arm-mve-vpt"
namespace {
- class MVEVPTBlock : public MachineFunctionPass {
- public:
- static char ID;
- const Thumb2InstrInfo *TII;
- const TargetRegisterInfo *TRI;
+class MVEVPTBlock : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
- MVEVPTBlock() : MachineFunctionPass(ID) {}
+ MVEVPTBlock() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
- StringRef getPassName() const override {
- return "MVE VPT block insertion pass";
- }
+ StringRef getPassName() const override {
+ return "MVE VPT block insertion pass";
+ }
- private:
- bool InsertVPTBlocks(MachineBasicBlock &MBB);
- };
+private:
+ bool InsertVPTBlocks(MachineBasicBlock &MBB);
+};
- char MVEVPTBlock::ID = 0;
+char MVEVPTBlock::ID = 0;
} // end anonymous namespace
return &*CmpMI;
}
+static ARM::PredBlockMask ExpandBlockMask(ARM::PredBlockMask BlockMask,
+ ARMVCC::VPTCodes Kind) {
+ using PredBlockMask = ARM::PredBlockMask;
+ assert(Kind != ARMVCC::None && "Cannot expand mask with 'None'");
+ assert(countTrailingZeros((unsigned)BlockMask) != 0 &&
+ "Mask is already full");
+
+ auto ChooseMask = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) {
+ return (Kind == ARMVCC::Then) ? AddedThen : AddedElse;
+ };
+
+ switch (BlockMask) {
+ case PredBlockMask::T:
+ return ChooseMask(PredBlockMask::TT, PredBlockMask::TE);
+ case PredBlockMask::TT:
+ return ChooseMask(PredBlockMask::TTT, PredBlockMask::TTE);
+ case PredBlockMask::TE:
+ return ChooseMask(PredBlockMask::TET, PredBlockMask::TEE);
+ case PredBlockMask::TTT:
+ return ChooseMask(PredBlockMask::TTTT, PredBlockMask::TTTE);
+ case PredBlockMask::TTE:
+ return ChooseMask(PredBlockMask::TTET, PredBlockMask::TTEE);
+ case PredBlockMask::TET:
+ return ChooseMask(PredBlockMask::TETT, PredBlockMask::TETE);
+ case PredBlockMask::TEE:
+ return ChooseMask(PredBlockMask::TEET, PredBlockMask::TEEE);
+ default:
+ llvm_unreachable("Unknown Mask");
+ }
+}
+
+// Advances Iter past a block of predicated instructions.
+// Returns true if it successfully skipped the whole block of predicated
+// instructions. Returns false when it stopped early (due to MaxSteps), or if
+// Iter didn't point to a predicated instruction.
+static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ unsigned MaxSteps,
+ unsigned &NumInstrsSteppedOver) {
+ ARMVCC::VPTCodes NextPred = ARMVCC::None;
+ unsigned PredReg;
+ NumInstrsSteppedOver = 0;
+
+ while (Iter != EndIter) {
+ NextPred = getVPTInstrPredicate(*Iter, PredReg);
+ assert(NextPred != ARMVCC::Else &&
+ "VPT block pass does not expect Else preds");
+ if (NextPred == ARMVCC::None || MaxSteps == 0)
+ break;
+ --MaxSteps;
+ ++Iter;
+ ++NumInstrsSteppedOver;
+ };
+
+ return NumInstrsSteppedOver != 0 &&
+ (NextPred == ARMVCC::None || Iter == EndIter);
+}
+
+// Returns true if at least one instruction in the range [Iter, End) defines
+// or kills VPR.
+static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter,
+ MachineBasicBlock::iterator End) {
+ for (; Iter != End; ++Iter)
+ if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR))
+ return true;
+ return false;
+}
+
+// Given an iterator (Iter) that points at an instruction with a "Then"
+// predicate, tries to create the largest block of continuous predicated
+// instructions possible, and returns the VPT Block Mask of that block.
+//
+// This will try to perform some minor optimization in order to maximize the
+// size of the block.
+static ARM::PredBlockMask
+CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ SmallVectorImpl<MachineInstr *> &DeadInstructions) {
+ MachineBasicBlock::instr_iterator BlockBeg = Iter;
+ assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
+ "Expected a Predicated Instruction");
+
+ LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
+
+ unsigned BlockSize;
+ StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
+
+ LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
+ std::next(BlockBeg);
+ AddedInstIter != Iter; ++AddedInstIter) {
+ dbgs() << " adding: ";
+ AddedInstIter->dump();
+ });
+
+ // Generate the initial BlockMask
+ ARM::PredBlockMask BlockMask = getARMVPTBlockMask(BlockSize);
+
+ // Remove VPNOTs while there's still room in the block, so we can make the
+ // largest block possible.
+ ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then;
+ while (BlockSize < 4 && Iter != EndIter &&
+ Iter->getOpcode() == ARM::MVE_VPNOT) {
+
+ // Try to skip all of the predicated instructions after the VPNOT, stopping
+ // after (4 - BlockSize). If we can't skip them all, stop.
+ unsigned ElseInstCnt = 0;
+ MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
+ if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
+ ElseInstCnt))
+ break;
+
+ // Check if this VPNOT can be removed or not: It can only be removed if at
+ // least one of the predicated instruction that follows it kills or sets
+ // VPR.
+ if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
+ break;
+
+ LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump(););
+
+ // Record the new size of the block
+ BlockSize += ElseInstCnt;
+ assert(BlockSize <= 4 && "Block is too large!");
+
+ // Record the VPNot to remove it later.
+ DeadInstructions.push_back(&*Iter);
+ ++Iter;
+
+ // Replace "then" by "elses" in the block until we find an instruction that
+ // defines VPR, then after that leave everything to "t".
+ // Note that we are using "Iter" to iterate over the block so we can update
+ // it at the same time.
+ bool ChangeToElse = (CurrentPredicate == ARMVCC::Then);
+ for (; Iter != VPNOTBlockEndIter; ++Iter) {
+ // Find the register in which the predicate is
+ int OpIdx = findFirstVPTPredOperandIdx(*Iter);
+ assert(OpIdx != -1);
+
+ // Update the mask + change the predicate to an else if needed.
+ if (ChangeToElse) {
+ // Change the predicate and update the mask
+ Iter->getOperand(OpIdx).setImm(ARMVCC::Else);
+ BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else);
+ // Reset back to a "then" predicate if this instruction defines VPR.
+ if (Iter->definesRegister(ARM::VPR))
+ ChangeToElse = false;
+ } else
+ BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then);
+
+ LLVM_DEBUG(dbgs() << " adding: "; Iter->dump());
+ }
+
+ CurrentPredicate =
+ (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
+ }
+ return BlockMask;
+}
+
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
bool Modified = false;
MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
while (MBIter != EndIter) {
MachineInstr *MI = &*MBIter;
unsigned PredReg = 0;
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
// The idea of the predicate is that None, Then and Else are for use when
// handling assembly language: they correspond to the three possible
// suffixes "", "t" and "e" on the mnemonic. So when instructions are read
- // from assembly source or disassembled from object code, you expect to see
- // a mixture whenever there's a long VPT block. But in code generation, we
- // hope we'll never generate an Else as input to this pass.
+ // from assembly source or disassembled from object code, you expect to
+ // see a mixture whenever there's a long VPT block. But in code
+ // generation, we hope we'll never generate an Else as input to this pass.
assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
if (Pred == ARMVCC::None) {
continue;
}
- LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
- int VPTInstCnt = 1;
- ARMVCC::VPTCodes NextPred;
-
- // Look at subsequent instructions, checking if they can be in the same VPT
- // block.
- ++MBIter;
- while (MBIter != EndIter && VPTInstCnt < 4) {
- NextPred = getVPTInstrPredicate(*MBIter, PredReg);
- assert(NextPred != ARMVCC::Else &&
- "VPT block pass does not expect Else preds");
- if (NextPred != Pred)
- break;
- LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump());
- ++VPTInstCnt;
- ++MBIter;
- };
-
- unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt);
+ ARM::PredBlockMask BlockMask =
+ CreateVPTBlock(MBIter, EndIter, DeadInstructions);
- // Search back for a VCMP that can be folded to create a VPT, or else create
- // a VPST directly
+ // Search back for a VCMP that can be folded to create a VPT, or else
+ // create a VPST directly
MachineInstrBuilder MIBuilder;
unsigned NewOpcode;
- MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode);
- if (VCMP) {
+ LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
+ if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
- MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
+ MIBuilder.addImm((uint64_t)BlockMask);
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
VCMP->eraseFromParent();
} else {
- MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
+ MIBuilder.addImm((uint64_t)BlockMask);
}
finalizeBundle(
Modified = true;
}
+
+ // Erase all dead instructions
+ for (MachineInstr *DeadMI : DeadInstructions) {
+ if (DeadMI->isInsideBundle())
+ DeadMI->eraseFromBundle();
+ else
+ DeadMI->eraseFromParent();
+ }
+
return Modified;
}
int findFirstVPTPredOperandIdx(const MachineInstr &MI);
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
unsigned &PredReg);
+inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) {
+ unsigned PredReg;
+ return getVPTInstrPredicate(MI, PredReg);
}
+} // namespace llvm
#endif
define arm_aapcs_vfpcc <4 x i32> @vpnot_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: vpnot_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpt.s32 lt, q0, zr
+; CHECK-NEXT: vpte.s32 lt, q0, zr
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vcmpt.i32 eq, q2, zr
+; CHECK-NEXT: vcmpe.i32 eq, q2, zr
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %s
}
+
+declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttet_v4i1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q3, q2
+; CHECK-NEXT: vpttet.s32 ge, q0, q2
+; CHECK-NEXT: vmaxt.s32 q3, q0, q1
+; CHECK-NEXT: vcmpt.s32 gt, q0, zr
+; CHECK-NEXT: vcmpe.s32 gt, q1, zr
+; CHECK-NEXT: vmovt q3, q2
+; CHECK-NEXT: vmov q0, q3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %x, %z
+ %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z)
+ %2 = icmp sgt <4 x i32> %x, zeroinitializer
+ %3 = and <4 x i1> %0, %2
+ %4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
+ %5 = icmp sgt <4 x i32> %y, zeroinitializer
+ %6 = and <4 x i1> %5, %4
+ %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
+ ret <4 x i32> %7
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttee_v4i1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q3, q2
+; CHECK-NEXT: vpttee.s32 ge, q0, q2
+; CHECK-NEXT: vmaxt.s32 q3, q0, q1
+; CHECK-NEXT: vcmpt.s32 gt, q0, zr
+; CHECK-NEXT: vmove q3, q2
+; CHECK-NEXT: vmove q3, q2
+; CHECK-NEXT: vmov q0, q3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %x, %z
+ %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z)
+ %2 = icmp sgt <4 x i32> %x, zeroinitializer
+ %3 = and <4 x i1> %0, %2
+ %4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
+ %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
+ ret <4 x i32> %6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: vpttte_v4i1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q3, q2
+; CHECK-NEXT: vpttte.s32 ge, q0, q2
+; CHECK-NEXT: vmaxt.s32 q3, q0, q1
+; CHECK-NEXT: vcmpt.s32 gt, q0, zr
+; CHECK-NEXT: vmovt q3, q2
+; CHECK-NEXT: vmove q3, q2
+; CHECK-NEXT: vmov q0, q3
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %x, %z
+ %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z)
+ %2 = icmp sgt <4 x i32> %x, zeroinitializer
+ %3 = and <4 x i1> %0, %2
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %1)
+ %5 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
+ %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4)
+ ret <4 x i32> %6
+}
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vpt.s32 ge, q1, r2
+; CHECK-NEXT: vpte.s32 ge, q1, r2
; CHECK-NEXT: vcmpt.s32 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrwt.32 q0, [r0], #16
+; CHECK-NEXT: vstrwe.32 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q1, [r0]
-; CHECK-NEXT: vpt.s16 ge, q1, r2
+; CHECK-NEXT: vpte.s16 ge, q1, r2
; CHECK-NEXT: vcmpt.s16 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrht.16 q0, [r0], #16
+; CHECK-NEXT: vstrhe.16 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q1, [r0]
-; CHECK-NEXT: vpt.s8 ge, q1, r2
+; CHECK-NEXT: vpte.s8 ge, q1, r2
; CHECK-NEXT: vcmpt.s8 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrbt.8 q0, [r0], #16
+; CHECK-NEXT: vstrbe.8 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vpt.f32 ge, q1, r2
+; CHECK-NEXT: vpte.f32 ge, q1, r2
; CHECK-NEXT: vcmpt.f32 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrwt.32 q0, [r0], #16
+; CHECK-NEXT: vstrwe.32 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q1, [r0]
-; CHECK-NEXT: vpt.f16 ge, q1, r2
+; CHECK-NEXT: vpte.f16 ge, q1, r2
; CHECK-NEXT: vcmpt.f16 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrht.16 q0, [r0], #16
+; CHECK-NEXT: vstrhe.16 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vpt.s32 ge, q1, r2
+; CHECK-NEXT: vpte.s32 ge, q1, r2
; CHECK-NEXT: vcmpt.s32 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrwt.32 q0, [r0], #16
+; CHECK-NEXT: vstrwe.32 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB6_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q1, [r0]
-; CHECK-NEXT: vpt.s16 ge, q1, r2
+; CHECK-NEXT: vpte.s16 ge, q1, r2
; CHECK-NEXT: vcmpt.s16 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrht.16 q0, [r0], #16
+; CHECK-NEXT: vstrhe.16 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB7_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q1, [r0]
-; CHECK-NEXT: vpt.s8 ge, q1, r2
+; CHECK-NEXT: vpte.s8 ge, q1, r2
; CHECK-NEXT: vcmpt.s8 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrbt.8 q0, [r0], #16
+; CHECK-NEXT: vstrbe.8 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB7_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vpt.f32 ge, q1, r2
+; CHECK-NEXT: vpte.f32 ge, q1, r2
; CHECK-NEXT: vcmpt.f32 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrwt.32 q0, [r0], #16
+; CHECK-NEXT: vstrwe.32 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB8_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB9_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q1, [r0]
-; CHECK-NEXT: vpt.f16 ge, q1, r2
+; CHECK-NEXT: vpte.f16 ge, q1, r2
; CHECK-NEXT: vcmpt.f16 le, q1, r1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrht.16 q0, [r0], #16
+; CHECK-NEXT: vstrhe.16 q0, [r0], #16
; CHECK-NEXT: le lr, .LBB9_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK: liveins: $q0, $q1, $q2, $r0
; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg
; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3
- ; CHECK: BUNDLE implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 {
- ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $vpr, implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 {
+ ; CHECK: MVE_VPST 12, implicit $vpr
; CHECK: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3
- ; CHECK: }
- ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
- ; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit $vpr, implicit killed $q3, implicit undef $q1 {
- ; CHECK: MVE_VPST 8, implicit $vpr
- ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, renamable $q3, 1, renamable $vpr, undef renamable $q1
+ ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, internal renamable $q3, 2, internal renamable $vpr, undef renamable $q1
; CHECK: }
; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3
; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit killed $vpr, implicit killed $q1, implicit killed $q2, implicit killed $q3, implicit killed $q0 {
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-none-eabi"
+
+ define hidden arm_aapcs_vfpcc <4 x float> @vpt_block_else(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
+ entry:
+ %conv.i = zext i16 %p to i32
+ %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2
+ %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2
+ %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2
+ %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2
+ ret <4 x float> %3
+ }
+
+ declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1
+
+ attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { nounwind readnone }
+ attributes #2 = { nounwind }
+
+...
+---
+name: vpt_block_else
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$q0', virtual-reg: '' }
+ - { reg: '$q1', virtual-reg: '' }
+ - { reg: '$q2', virtual-reg: '' }
+ - { reg: '$q3', virtual-reg: '' }
+ - { reg: '$r0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+constants: []
+body: |
+ bb.0.entry:
+ liveins: $q0, $q1, $q2
+
+ ; CHECK-LABEL: name: vpt_block_else
+ ; CHECK: liveins: $q0, $q1, $q2
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
+ ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr
+ ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
+ ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
+ ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
+ ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 {
+ ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 {
+ ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
+ ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 {
+ ; CHECK: MVE_VPTv4s32 6, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 {
+ ; CHECK: MVE_VPTv4s32 11, renamable $q0, renamable $q2, 10, implicit-def $vpr
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3
+ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
+ ; CHECK: }
+ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3
+ $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3
+ $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
+ renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
+ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
+ $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
+
+ tBX_RET 14, $noreg, implicit $q0
+
+...
; CHECK-LABEL: name: vpnot
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1 {
- ; CHECK: MVE_VPTv4s32r 8, renamable $q0, $zr, 11, implicit-def $vpr
+ ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1, implicit killed $q2 {
+ ; CHECK: MVE_VPTv4s32r 12, renamable $q0, $zr, 11, implicit-def $vpr
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, internal killed renamable $vpr
- ; CHECK: }
- ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
- ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr {
- ; CHECK: MVE_VPST 8, implicit $vpr
- ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr
+ ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr
; CHECK: }
; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
; CHECK: liveins: $q0, $q1, $q2
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q0, $zr, 11, 0, $noreg
; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
- ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr {
- ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr, implicit killed $q2 {
+ ; CHECK: MVE_VPST 12, implicit $vpr
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr
- ; CHECK: }
- ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
- ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr {
- ; CHECK: MVE_VPST 8, implicit $vpr
- ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr
+ ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr
; CHECK: }
; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr