From 2effe8f5e72ac94286445c0fdfd2cad6999f4cb4 Mon Sep 17 00:00:00 2001 From: Pierre-vh Date: Mon, 9 Mar 2020 16:40:33 +0000 Subject: [PATCH] [Target][ARM] Improvements to the VPT Block Insertion Pass This allows the MVE VPT Block insertion pass to remove VPNOTs in order to create more complex VPT blocks such as TE, TEET, TETE, etc. Differential Revision: https://reviews.llvm.org/D75993 --- llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 249 +++++++++++++++++---- llvm/lib/Target/ARM/Thumb2InstrInfo.h | 4 + llvm/test/CodeGen/Thumb2/mve-pred-not.ll | 76 ++++++- llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll | 60 ++--- .../CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir | 10 +- llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir | 231 +++++++++++++++++++ llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir | 20 +- 7 files changed, 536 insertions(+), 114 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index c8b725f..30964c3 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -34,30 +34,30 @@ using namespace llvm; #define DEBUG_TYPE "arm-mve-vpt" namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; - const Thumb2InstrInfo *TII; - const TargetRegisterInfo *TRI; +class MVEVPTBlock : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; - MVEVPTBlock() : MachineFunctionPass(ID) {} + MVEVPTBlock() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &Fn) override; - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); - }; +private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); +}; - char MVEVPTBlock::ID = 0; +char MVEVPTBlock::ID = 0; } // end anonymous namespace @@ -94,24 +94,183 @@ static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, return &*CmpMI; } +static ARM::PredBlockMask ExpandBlockMask(ARM::PredBlockMask BlockMask, + ARMVCC::VPTCodes Kind) { + using PredBlockMask = ARM::PredBlockMask; + assert(Kind != ARMVCC::None && "Cannot expand mask with 'None'"); + assert(countTrailingZeros((unsigned)BlockMask) != 0 && + "Mask is already full"); + + auto ChooseMask = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) { + return (Kind == ARMVCC::Then) ? AddedThen : AddedElse; + }; + + switch (BlockMask) { + case PredBlockMask::T: + return ChooseMask(PredBlockMask::TT, PredBlockMask::TE); + case PredBlockMask::TT: + return ChooseMask(PredBlockMask::TTT, PredBlockMask::TTE); + case PredBlockMask::TE: + return ChooseMask(PredBlockMask::TET, PredBlockMask::TEE); + case PredBlockMask::TTT: + return ChooseMask(PredBlockMask::TTTT, PredBlockMask::TTTE); + case PredBlockMask::TTE: + return ChooseMask(PredBlockMask::TTET, PredBlockMask::TTEE); + case PredBlockMask::TET: + return ChooseMask(PredBlockMask::TETT, PredBlockMask::TETE); + case PredBlockMask::TEE: + return ChooseMask(PredBlockMask::TEET, PredBlockMask::TEEE); + default: + llvm_unreachable("Unknown Mask"); + } +} + +// Advances Iter past a block of predicated instructions. +// Returns true if it successfully skipped the whole block of predicated +// instructions. Returns false when it stopped early (due to MaxSteps), or if +// Iter didn't point to a predicated instruction. +static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + unsigned MaxSteps, + unsigned &NumInstrsSteppedOver) { + ARMVCC::VPTCodes NextPred = ARMVCC::None; + unsigned PredReg; + NumInstrsSteppedOver = 0; + + while (Iter != EndIter) { + NextPred = getVPTInstrPredicate(*Iter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred == ARMVCC::None || MaxSteps == 0) + break; + --MaxSteps; + ++Iter; + ++NumInstrsSteppedOver; + }; + + return NumInstrsSteppedOver != 0 && + (NextPred == ARMVCC::None || Iter == EndIter); +} + +// Returns true if at least one instruction in the range [Iter, End) defines +// or kills VPR. +static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, + MachineBasicBlock::iterator End) { + for (; Iter != End; ++Iter) + if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) + return true; + return false; +} + +// Given an iterator (Iter) that points at an instruction with a "Then" +// predicate, tries to create the largest block of continuous predicated +// instructions possible, and returns the VPT Block Mask of that block. +// +// This will try to perform some minor optimization in order to maximize the +// size of the block. +static ARM::PredBlockMask +CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + SmallVectorImpl &DeadInstructions) { + MachineBasicBlock::instr_iterator BlockBeg = Iter; + assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && + "Expected a Predicated Instruction"); + + LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); + + unsigned BlockSize; + StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); + + LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = + std::next(BlockBeg); + AddedInstIter != Iter; ++AddedInstIter) { + dbgs() << " adding: "; + AddedInstIter->dump(); + }); + + // Generate the initial BlockMask + ARM::PredBlockMask BlockMask = getARMVPTBlockMask(BlockSize); + + // Remove VPNOTs while there's still room in the block, so we can make the + // largest block possible. + ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then; + while (BlockSize < 4 && Iter != EndIter && + Iter->getOpcode() == ARM::MVE_VPNOT) { + + // Try to skip all of the predicated instructions after the VPNOT, stopping + // after (4 - BlockSize). If we can't skip them all, stop. + unsigned ElseInstCnt = 0; + MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); + if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), + ElseInstCnt)) + break; + + // Check if this VPNOT can be removed or not: It can only be removed if at + // least one of the predicated instruction that follows it kills or sets + // VPR. + if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) + break; + + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + + // Record the new size of the block + BlockSize += ElseInstCnt; + assert(BlockSize <= 4 && "Block is too large!"); + + // Record the VPNot to remove it later. + DeadInstructions.push_back(&*Iter); + ++Iter; + + // Replace "then" by "elses" in the block until we find an instruction that + // defines VPR, then after that leave everything to "t". + // Note that we are using "Iter" to iterate over the block so we can update + // it at the same time. + bool ChangeToElse = (CurrentPredicate == ARMVCC::Then); + for (; Iter != VPNOTBlockEndIter; ++Iter) { + // Find the register in which the predicate is + int OpIdx = findFirstVPTPredOperandIdx(*Iter); + assert(OpIdx != -1); + + // Update the mask + change the predicate to an else if needed. + if (ChangeToElse) { + // Change the predicate and update the mask + Iter->getOperand(OpIdx).setImm(ARMVCC::Else); + BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else); + // Reset back to a "then" predicate if this instruction defines VPR. + if (Iter->definesRegister(ARM::VPR)) + ChangeToElse = false; + } else + BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then); + + LLVM_DEBUG(dbgs() << " adding: "; Iter->dump()); + } + + CurrentPredicate = + (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); + } + return BlockMask; +} + bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); + SmallVector DeadInstructions; + while (MBIter != EndIter) { MachineInstr *MI = &*MBIter; unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); // The idea of the predicate is that None, Then and Else are for use when // handling assembly language: they correspond to the three possible // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. + // from assembly source or disassembled from object code, you expect to + // see a mixture whenever there's a long VPT block. But in code + // generation, we hope we'll never generate an Else as input to this pass. assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (Pred == ARMVCC::None) { @@ -119,42 +278,25 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { continue; } - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - // Look at subsequent instructions, checking if they can be in the same VPT - // block. - ++MBIter; - while (MBIter != EndIter && VPTInstCnt < 4) { - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - assert(NextPred != ARMVCC::Else && - "VPT block pass does not expect Else preds"); - if (NextPred != Pred) - break; - LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); - ++VPTInstCnt; - ++MBIter; - }; - - unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt); + ARM::PredBlockMask BlockMask = + CreateVPTBlock(MBIter, EndIter, DeadInstructions); - // Search back for a VCMP that can be folded to create a VPT, or else create - // a VPST directly + // Search back for a VCMP that can be folded to create a VPT, or else + // create a VPST directly MachineInstrBuilder MIBuilder; unsigned NewOpcode; - MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); - if (VCMP) { + LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); + if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); - MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); + MIBuilder.addImm((uint64_t)BlockMask); MIBuilder.add(VCMP->getOperand(1)); MIBuilder.add(VCMP->getOperand(2)); MIBuilder.add(VCMP->getOperand(3)); VCMP->eraseFromParent(); } else { - MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm((uint64_t)BlockMask); } finalizeBundle( @@ -162,6 +304,15 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { Modified = true; } + + // Erase all dead instructions + for (MachineInstr *DeadMI : DeadInstructions) { + if (DeadMI->isInsideBundle()) + DeadMI->eraseFromBundle(); + else + DeadMI->eraseFromParent(); + } + return Modified; } diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 3e8e77a..583a091 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -74,6 +74,10 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg); int findFirstVPTPredOperandIdx(const MachineInstr &MI); ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, unsigned &PredReg); +inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { + unsigned PredReg; + return getVPTInstrPredicate(MI, PredReg); } +} // namespace llvm #endif diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll index bc94f8b..70fc0e4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll @@ -383,11 +383,9 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vpnot_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: vpnot_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpt.s32 lt, q0, zr +; CHECK-NEXT: vpte.s32 lt, q0, zr ; CHECK-NEXT: vcmpt.s32 gt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vcmpt.i32 eq, q2, zr +; CHECK-NEXT: vcmpe.i32 eq, q2, zr ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: @@ -400,3 +398,73 @@ entry: %s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b ret <4 x i32> %s } + +declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttet_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttet.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vcmpe.s32 gt, q1, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = icmp sgt <4 x i32> %y, zeroinitializer + %6 = and <4 x i1> %5, %4 + %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1) + ret <4 x i32> %7 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttee_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttee.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1) + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5) + ret <4 x i32> %6 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttte_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttte.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %1) + %5 = xor <4 x i1> %3, + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4) + ret <4 x i32> %6 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll index ce82ba0..df211f1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -19,11 +19,9 @@ define arm_aapcs_vfpcc void @thres_i32(i32* %data, i16 zeroext %N, i32 %T) { ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -77,11 +75,9 @@ define arm_aapcs_vfpcc void @thresh_i16(i16* %data, i16 zeroext %N, i16 signext ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB1_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -135,11 +131,9 @@ define arm_aapcs_vfpcc void @thresh_i8(i8* %data, i16 zeroext %N, i8 signext %T) ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -195,11 +189,9 @@ define arm_aapcs_vfpcc void @thresh_f32(float* %data, i16 zeroext %N, float %T) ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.f32 ge, q1, r2 +; CHECK-NEXT: vpte.f32 ge, q1, r2 ; CHECK-NEXT: vcmpt.f32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -255,11 +247,9 @@ define arm_aapcs_vfpcc void @thresh_f16(half* %data, i16 zeroext %N, float %T.co ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.f16 ge, q1, r2 +; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -318,11 +308,9 @@ define arm_aapcs_vfpcc void @thres_rev_i32(i32* %data, i16 zeroext %N, i32 %T) { ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB5_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -376,11 +364,9 @@ define arm_aapcs_vfpcc void @thresh_rev_i16(i16* %data, i16 zeroext %N, i16 sign ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -434,11 +420,9 @@ define arm_aapcs_vfpcc void @thresh_rev_i8(i8* %data, i16 zeroext %N, i8 signext ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -494,11 +478,9 @@ define arm_aapcs_vfpcc void @thresh_rev_f32(float* %data, i16 zeroext %N, float ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.f32 ge, q1, r2 +; CHECK-NEXT: vpte.f32 ge, q1, r2 ; CHECK-NEXT: vcmpt.f32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -554,11 +536,9 @@ define arm_aapcs_vfpcc void @thresh_rev_f16(half* %data, i16 zeroext %N, float % ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.f16 ge, q1, r2 +; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB9_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir index 347c487..7401d77 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir @@ -68,14 +68,10 @@ body: | ; CHECK: liveins: $q0, $q1, $q2, $r0 ; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 - ; CHECK: BUNDLE implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $vpr, implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit $vpr, implicit killed $q3, implicit undef $q1 { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, renamable $q3, 1, renamable $vpr, undef renamable $q1 + ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, internal renamable $q3, 2, internal renamable $vpr, undef renamable $q1 ; CHECK: } ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit killed $vpr, implicit killed $q1, implicit killed $q2, implicit killed $q3, implicit killed $q0 { diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir new file mode 100644 index 0000000..765d3a4 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir @@ -0,0 +1,231 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @vpt_block_else(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %3 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: vpt_block_else +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: vpt_block_else + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 6, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 11, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir index b3e9539..8bc7a0b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir @@ -61,14 +61,10 @@ body: | ; CHECK-LABEL: name: vpnot ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1 { - ; CHECK: MVE_VPTv4s32r 8, renamable $q0, $zr, 11, implicit-def $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1, implicit killed $q2 { + ; CHECK: MVE_VPTv4s32r 12, renamable $q0, $zr, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, internal killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 @@ -244,14 +240,10 @@ body: | ; CHECK: liveins: $q0, $q1, $q2 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q0, $zr, 11, 0, $noreg ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr, implicit killed $q2 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr -- 2.7.4