From: Amy Kwan Date: Wed, 28 Apr 2021 03:37:02 +0000 (-0500) Subject: [PowerPC] Add new infrastructure to select load/store instructions, update P8/P9... X-Git-Tag: llvmorg-14-init~7964 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=64d951be61aa7d69ad10cc87796151156da7d7ce;p=platform%2Fupstream%2Fllvm.git [PowerPC] Add new infrastructure to select load/store instructions, update P8/P9 load/store patterns. This patch introduces a new infrastructure that is used to select the load and store instructions in the PPC backend. The primary motivation is that the current implementation of selecting load/stores is dependent on the ordering of patterns in TableGen. Given this limitation, we are not able to easily and reliably generate the P10 prefixed load and stores instructions (such as when the immediates that fit within 34-bits). This refactoring is meant to provide us with more control over the patterns/different forms to exploit, as well as eliminating dependency of pattern declaration in TableGen. The idea of this refactoring is that it introduces a set of addressing modes that correspond to different instruction formats of a particular load and store instruction, along with a set of common flags that describes a load/store. Whenever a load/store instruction is being selected, we analyze the instruction and compute a set of flags for it. The computed flags are then used to select the most optimal load/store addressing mode. This patch is the first of a series of patches to be committed - it contains the initial implementation of the refactored load/store selection infrastructure and also updates P8/P9 patterns to adopt this infrastructure. The idea is that incremental patches will add more implementation and support, and eventually the old implementation will be removed. Differential Revision: https://reviews.llvm.org/D93370 --- diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 568c9d6..65ebafd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -229,6 +229,45 @@ namespace { return false; } + /// SelectDSForm - Returns true if address N can be represented by the + /// addressing mode of DSForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 4. + bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(4)) == PPC::AM_DSForm; + } + + /// SelectDQForm - Returns true if address N can be represented by the + /// addressing mode of DQForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 16. + bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(16)) == PPC::AM_DQForm; + } + + /// SelectDForm - Returns true if address N can be represented by + /// the addressing mode of DForm instructions (a base register, plus a + /// signed 16-bit immediate. + bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_DForm; + } + + /// SelectXForm - Returns true if address N can be represented by the + /// addressing mode of XForm instructions (an indexed [r+r] operation). + bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + None) == PPC::AM_XForm; + } + + /// SelectForceXForm - Given the specified address, force it to be + /// represented as an indexed [r+r] operation (an XForm instruction). + bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == + PPC::AM_XForm; + } + /// SelectAddrIdx - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is D. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1547fc3..11ee8d7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -136,6 +136,10 @@ extern cl::opt ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { + // Initialize map that relates the PPC addressing modes to the computed flags + // of a load/store instruction. The map is used to determine the optimal + // addressing mode when selecting load and stores. + initializeAddrModeMap(); // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); @@ -1424,6 +1428,84 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); } +// *********************************** NOTE ************************************ +// For selecting load and store instructions, the addressing modes are defined +// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD +// patterns to match the load the store instructions. +// +// The TD definitions for the addressing modes correspond to their respective +// SelectForm() function in PPCISelDAGToDAG.cpp. These functions rely +// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the +// address mode flags of a particular node. Afterwards, the computed address +// flags are passed into getAddrModeForFlags() in order to retrieve the optimal +// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement +// accordingly, based on the preferred addressing mode. +// +// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode. +// MemOpFlags contains all the possible flags that can be used to compute the +// optimal addressing mode for load and store instructions. +// AddrMode contains all the possible load and store addressing modes available +// on Power (such as DForm, DSForm, DQForm, XForm, etc.) +// +// When adding new load and store instructions, it is possible that new address +// flags may need to be added into MemOpFlags, and a new addressing mode will +// need to be added to AddrMode. An entry of the new addressing mode (consisting +// of the minimal and main distinguishing address flags for the new load/store +// instructions) will need to be added into initializeAddrModeMap() below. +// Finally, when adding new addressing modes, the getAddrModeForFlags() will +// need to be updated to account for selecting the optimal addressing mode. +// ***************************************************************************** +/// Initialize the map that relates the different addressing modes of the load +/// and store instructions to a set of flags. This ensures the load/store +/// instruction is correctly matched during instruction selection. +void PPCTargetLowering::initializeAddrModeMap() { + AddrModesMap[PPC::AM_DForm] = { + // LWZ, STW + PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt, + PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, + // LBZ, LHZ, STB, STH + PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt, + PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt, + PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt, + PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt, + // LHA + PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt, + PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt, + PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt, + PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt, + // LFS, LFD, STFS, STFD + PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, + PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, + PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9, + }; + AddrModesMap[PPC::AM_DSForm] = { + // LWA + PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt, + PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt, + PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt, + // LD, STD + PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt, + PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt, + PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt, + // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64 + PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, + PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9, + }; + AddrModesMap[PPC::AM_DQForm] = { + // LXV, STXV + PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9, + PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9, + PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9, + PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, + PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, + PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10, + }; +} + /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { @@ -2436,6 +2518,20 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } +/// Used when computing address flags for selecting loads and stores. +/// If we have an OR, check if the LHS and RHS are provably disjoint. +/// An OR of two provably disjoint values is equivalent to an ADD. +/// Most PPC load/store instructions compute the effective address as a sum, +/// so doing this conversion is useful. +static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) { + if (N.getOpcode() != ISD::OR) + return false; + KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); + if (!LHSKnown.Zero.getBoolValue()) + return false; + KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); + return (~(LHSKnown.Zero | RHSKnown.Zero) == 0); +} /// SelectAddressEVXRegReg - Given the specified address, check to see if it can /// be represented as an indexed [r+r] operation. @@ -16836,3 +16932,343 @@ SDValue PPCTargetLowering::combineVSelect(SDNode *N, return SDValue(); } + +/// getAddrModeForFlags - Based on the set of address flags, select the most +/// optimal instruction format to match by. +PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const { + // This is not a node we should be handling here. + if (Flags == PPC::MOF_None) + return PPC::AM_None; + // Unaligned D-Forms are tried first, followed by the aligned D-Forms. + for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DSForm; + for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm)) + if ((Flags & FlagSet) == FlagSet) + return PPC::AM_DQForm; + // If no other forms are selected, return an X-Form as it is the most + // general addressing mode. + return PPC::AM_XForm; +} + +/// Set alignment flags based on whether or not the Frame Index is aligned. +/// Utilized when computing flags for address computation when selecting +/// load and store instructions. +static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, + SelectionDAG &DAG) { + bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR)); + FrameIndexSDNode *FI = dyn_cast(IsAdd ? N.getOperand(0) : N); + if (!FI) + return; + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value(); + // If this is (add $FI, $S16Imm), the alignment flags are already set + // based on the immediate. We just need to clear the alignment flags + // if the FI alignment is weaker. + if ((FrameIndexAlign % 4) != 0) + FlagSet &= ~PPC::MOF_RPlusSImm16Mult4; + if ((FrameIndexAlign % 16) != 0) + FlagSet &= ~PPC::MOF_RPlusSImm16Mult16; + // If the address is a plain FrameIndex, set alignment flags based on + // FI alignment. + if (!IsAdd) { + if ((FrameIndexAlign % 4) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult4; + if ((FrameIndexAlign % 16) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult16; + } +} + +/// Given a node, compute flags that are used for address computation when +/// selecting load and store instructions. The flags computed are stored in +/// FlagSet. This function takes into account whether the node is a constant, +/// an ADD, OR, or a constant, and computes the address flags accordingly. +static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, + SelectionDAG &DAG) { + // Set the alignment flags for the node depending on if the node is + // 4-byte or 16-byte aligned. + auto SetAlignFlagsForImm = [&](uint64_t Imm) { + if ((Imm & 0x3) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult4; + if ((Imm & 0xf) == 0) + FlagSet |= PPC::MOF_RPlusSImm16Mult16; + }; + + if (ConstantSDNode *CN = dyn_cast(N)) { + // All 32-bit constants can be computed as LIS + Disp. + const APInt &ConstImm = CN->getAPIntValue(); + if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants. + FlagSet |= PPC::MOF_AddrIsSImm32; + SetAlignFlagsForImm(ConstImm.getZExtValue()); + setAlignFlagsForFI(N, FlagSet, DAG); + } + if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants. + FlagSet |= PPC::MOF_RPlusSImm34; + else // Let constant materialization handle large constants. + FlagSet |= PPC::MOF_NotAddNorCst; + } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) { + // This address can be represented as an addition of: + // - Register + Imm16 (possibly a multiple of 4/16) + // - Register + Imm34 + // - Register + PPCISD::Lo + // - Register + Register + // In any case, we won't have to match this as Base + Zero. + SDValue RHS = N.getOperand(1); + if (ConstantSDNode *CN = dyn_cast(RHS)) { + const APInt &ConstImm = CN->getAPIntValue(); + if (ConstImm.isSignedIntN(16)) { + FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates. + SetAlignFlagsForImm(ConstImm.getZExtValue()); + setAlignFlagsForFI(N, FlagSet, DAG); + } + if (ConstImm.isSignedIntN(34)) + FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates. + else + FlagSet |= PPC::MOF_RPlusR; // Register. + } else if (RHS.getOpcode() == PPCISD::Lo && + !cast(RHS.getOperand(1))->getZExtValue()) + FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo. + else + FlagSet |= PPC::MOF_RPlusR; + } else { // The address computation is not a constant or an addition. + setAlignFlagsForFI(N, FlagSet, DAG); + FlagSet |= PPC::MOF_NotAddNorCst; + } +} + +/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute +/// the address flags of the load/store instruction that is to be matched. +unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, + SelectionDAG &DAG) const { + unsigned FlagSet = PPC::MOF_None; + + // Compute subtarget flags. + if (!Subtarget.hasP9Vector()) + FlagSet |= PPC::MOF_SubtargetBeforeP9; + else { + FlagSet |= PPC::MOF_SubtargetP9; + if (Subtarget.hasPrefixInstrs()) + FlagSet |= PPC::MOF_SubtargetP10; + } + if (Subtarget.hasSPE()) + FlagSet |= PPC::MOF_SubtargetSPE; + + // Mark this as something we don't want to handle here if it is atomic + // or pre-increment instruction. + if (const LSBaseSDNode *LSB = dyn_cast(Parent)) + if (LSB->isIndexed()) + return PPC::MOF_None; + if (isa(Parent)) + return PPC::MOF_None; + + // Compute in-memory type flags. This is based on if there are scalars, + // floats or vectors. + const MemSDNode *MN = dyn_cast(Parent); + assert(MN && "Parent should be a MemSDNode!"); + EVT MemVT = MN->getMemoryVT(); + unsigned Size = MemVT.getSizeInBits(); + if (MemVT.isScalarInteger()) { + assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!"); + if (Size < 32) + FlagSet |= PPC::MOF_SubWordInt; + else if (Size == 32) + FlagSet |= PPC::MOF_WordInt; + else + FlagSet |= PPC::MOF_DoubleWordInt; + } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors. + if (Size == 128) + FlagSet |= PPC::MOF_Vector; + else if (Size == 256) + FlagSet |= PPC::MOF_Vector256; + else + llvm_unreachable("Not expecting illegal vectors!"); + } else { // Floating point type: can be scalar, f128 or vector types. + if (Size == 32 || Size == 64) + FlagSet |= PPC::MOF_ScalarFloat; + else if (MemVT == MVT::f128 || MemVT.isVector()) + FlagSet |= PPC::MOF_Vector; + else + llvm_unreachable("Not expecting illegal scalar floats!"); + } + + // Compute flags for address computation. + computeFlagsForAddressComputation(N, FlagSet, DAG); + + // Compute type extension flags. + if (const LoadSDNode *LN = dyn_cast(Parent)) { + switch (LN->getExtensionType()) { + case ISD::SEXTLOAD: + FlagSet |= PPC::MOF_SExt; + break; + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: + FlagSet |= PPC::MOF_ZExt; + break; + case ISD::NON_EXTLOAD: + FlagSet |= PPC::MOF_NoExt; + break; + } + } else + FlagSet |= PPC::MOF_NoExt; + + // For integers, no extension is the same as zero extension. + // We set the extension mode to zero extension so we don't have + // to add separate entries in AddrModesMap for loads and stores. + if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) { + FlagSet |= PPC::MOF_ZExt; + FlagSet &= ~PPC::MOF_NoExt; + } + + // If we don't have prefixed instructions, 34-bit constants should be + // treated as PPC::MOF_NotAddNorCst so they can match D-Forms. + bool IsNonP1034BitConst = + ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) & + FlagSet) == PPC::MOF_RPlusSImm34; + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR && + IsNonP1034BitConst) + FlagSet |= PPC::MOF_NotAddNorCst; + + return FlagSet; +} + +/// SelectForceXFormMode - Given the specified address, force it to be +/// represented as an indexed [r+r] operation (an XForm instruction). +PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + + PPC::AddrMode Mode = PPC::AM_XForm; + int16_t ForceXFormImm = 0; + if (provablyDisjointOr(DAG, N) && + !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) { + Disp = N.getOperand(0); + Base = N.getOperand(1); + return Mode; + } + + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. However, we can reduce + // register pressure if we do not materialize a constant just for use as the + // index register. We only get rid of the add if it is not an add of a + // value and a 16-bit signed constant and both have a single use. + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { + Disp = N.getOperand(0); + Base = N.getOperand(1); + return Mode; + } + + // Otherwise, use R0 as the base register. + Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + N.getValueType()); + Base = N; + + return Mode; +} + +/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), +/// compute the address flags of the node, get the optimal address mode based +/// on the flags, and set the Base and Disp based on the address mode. +PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, + SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG, + MaybeAlign Align) const { + SDLoc DL(Parent); + + // Compute the address flags. + unsigned Flags = computeMOFlags(Parent, N, DAG); + + // Get the optimal address mode based on the Flags. + PPC::AddrMode Mode = getAddrModeForFlags(Flags); + + // Set Base and Disp accordingly depending on the address mode. + switch (Mode) { + case PPC::AM_DForm: + case PPC::AM_DSForm: + case PPC::AM_DQForm: { + // This is a register plus a 16-bit immediate. The base will be the + // register and the displacement will be the immediate unless it + // isn't sufficiently aligned. + if (Flags & PPC::MOF_RPlusSImm16) { + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + ConstantSDNode *CN = dyn_cast(Op1); + int16_t Imm = CN->getAPIntValue().getZExtValue(); + if (!Align || isAligned(*Align, Imm)) { + Disp = DAG.getTargetConstant(Imm, DL, N.getValueType()); + Base = Op0; + if (FrameIndexSDNode *FI = dyn_cast(Op0)) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } + break; + } + } + // This is a register plus the @lo relocation. The base is the register + // and the displacement is the global address. + else if (Flags & PPC::MOF_RPlusLo) { + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetGlobalTLSAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + break; + } + // This is a constant address at most 32 bits. The base will be + // zero or load-immediate-shifted and the displacement will be + // the low 16 bits of the address. + else if (Flags & PPC::MOF_AddrIsSImm32) { + ConstantSDNode *CN = dyn_cast(N); + EVT CNType = CN->getValueType(0); + uint64_t CNImm = CN->getZExtValue(); + // If this address fits entirely in a 16-bit sext immediate field, codegen + // this as "d, 0". + int16_t Imm; + if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) { + Disp = DAG.getTargetConstant(Imm, DL, CNType); + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + CNType); + break; + } + // Handle 32-bit sext immediate with LIS + Addr mode. + if ((CNType == MVT::i32 || isInt<32>(CNImm)) && + (!Align || isAligned(*Align, CNImm))) { + int32_t Addr = (int32_t)CNImm; + // Otherwise, break this down into LIS + Disp. + Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32); + Base = + DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32); + uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0); + break; + } + } + // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable. + Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout())); + if (FrameIndexSDNode *FI = dyn_cast(N)) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else + Base = N; + break; + } + case PPC::AM_None: + break; + default: { // By default, X-Form is always available to be selected. + // When a frame index is not aligned, we also match by XForm. + FrameIndexSDNode *FI = dyn_cast(N); + Base = FI ? N : N.getOperand(1); + Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + N.getValueType()) + : N.getOperand(0); + break; + } + } + return Mode; +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 1ec78cb..8184357 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -671,6 +671,49 @@ namespace llvm { /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + // Flags for computing the optimal addressing mode for loads and stores. + enum MemOpFlags { + MOF_None = 0, + + // Extension mode for integer loads. + MOF_SExt = 1, + MOF_ZExt = 1 << 1, + MOF_NoExt = 1 << 2, + + // Address computation flags. + MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar. + MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant. + MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation + MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4. + MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16. + MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant. + MOF_RPlusR = 1 << 11, // Sum of two variables. + MOF_PCRel = 1 << 12, // PC-Relative relocation. + MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant. + + // The in-memory type. + MOF_SubWordInt = 1 << 15, + MOF_WordInt = 1 << 16, + MOF_DoubleWordInt = 1 << 17, + MOF_ScalarFloat = 1 << 18, // Scalar single or double precision. + MOF_Vector = 1 << 19, // Vector types and quad precision scalars. + MOF_Vector256 = 1 << 20, + + // Subtarget features. + MOF_SubtargetBeforeP9 = 1 << 22, + MOF_SubtargetP9 = 1 << 23, + MOF_SubtargetP10 = 1 << 24, + MOF_SubtargetSPE = 1 << 25 + }; + + // The addressing modes for loads and stores. + enum AddrMode { + AM_None, + AM_DForm, + AM_DSForm, + AM_DQForm, + AM_XForm, + }; } // end namespace PPC class PPCTargetLowering : public TargetLowering { @@ -1041,6 +1084,18 @@ namespace llvm { unsigned JTI, MCContext &Ctx) const override; + /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), + /// compute the address flags of the node, get the optimal address mode + /// based on the flags, and set the Base and Disp based on the address mode. + PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, + SDValue &Disp, SDValue &Base, + SelectionDAG &DAG, + MaybeAlign Align) const; + /// SelectForceXFormMode - Given the specified address, force it to be + /// represented as an indexed [r+r] operation (an XForm instruction). + PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; + /// Structure that collects some common arguments that get passed around /// between the functions for call lowering. struct CallFlags { @@ -1083,6 +1138,10 @@ namespace llvm { } }; + // Map that relates a set of common address flags to PPC addressing modes. + std::map> AddrModesMap; + void initializeAddrModeMap(); + bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; @@ -1314,6 +1373,17 @@ namespace llvm { bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool hasBitPreservingFPLogic(EVT VT) const override; bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + + /// getAddrModeForFlags - Based on the set of address flags, select the most + /// optimal instruction format to match by. + PPC::AddrMode getAddrModeForFlags(unsigned Flags) const; + + /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute + /// the address flags of the load/store instruction that is to be matched. + /// The address flags are stored in a map, which is then searched + /// through to determine the optimal load/store instruction format. + unsigned computeMOFlags(const SDNode *Parent, SDValue N, + SelectionDAG &DAG) const; }; // end class PPCTargetLowering namespace PPC { diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 386d143..f8e9a4e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1062,21 +1062,21 @@ let PPC970_Unit = 2 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi16 iaddr:$src))]>, + [(set i64:$rD, (sextloadi16 DForm:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64, + (sextloadi32 DSForm:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), "lhax $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi16 xaddr:$src))]>, + [(set i64:$rD, (sextloadi16 XForm:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", IIC_LdStLHA, - [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 XForm:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { @@ -1117,23 +1117,23 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi8 iaddr:$src))]>; + [(set i64:$rD, (zextloadi8 DForm:$src))]>; def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), "lhz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi16 iaddr:$src))]>; + [(set i64:$rD, (zextloadi16 DForm:$src))]>; def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), "lwz $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + [(set i64:$rD, (zextloadi32 DForm:$src))]>, isPPC64; def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi8 xaddr:$src))]>; + [(set i64:$rD, (zextloadi8 XForm:$src))]>; def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$rD), (ins memrr:$src), "lhzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi16 xaddr:$src))]>; + [(set i64:$rD, (zextloadi16 XForm:$src))]>; def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (zextloadi32 xaddr:$src))]>; + [(set i64:$rD, (zextloadi32 XForm:$src))]>; // Update forms. @@ -1178,7 +1178,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, - [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (load DSForm:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1201,10 +1201,10 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src), "ldx $rD, $src", IIC_LdStLD, - [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (load XForm:$src))]>, isPPC64; def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src), "ldbrx $rD, $src", IIC_LdStLoad, - [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + [(set i64:$rD, (PPClbrx ForceXForm:$src, i64))]>, isPPC64; let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in { def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src), @@ -1380,38 +1380,38 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), "stb $rS, $src", IIC_LdStStore, - [(truncstorei8 i64:$rS, iaddr:$src)]>; + [(truncstorei8 i64:$rS, DForm:$src)]>; def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), "sth $rS, $src", IIC_LdStStore, - [(truncstorei16 i64:$rS, iaddr:$src)]>; + [(truncstorei16 i64:$rS, DForm:$src)]>; def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), "stw $rS, $src", IIC_LdStStore, - [(truncstorei32 i64:$rS, iaddr:$src)]>; + [(truncstorei32 i64:$rS, DForm:$src)]>; def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), "stbx $rS, $dst", IIC_LdStStore, - [(truncstorei8 i64:$rS, xaddr:$dst)]>, + [(truncstorei8 i64:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), "sthx $rS, $dst", IIC_LdStStore, - [(truncstorei16 i64:$rS, xaddr:$dst)]>, + [(truncstorei16 i64:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), "stwx $rS, $dst", IIC_LdStStore, - [(truncstorei32 i64:$rS, xaddr:$dst)]>, + [(truncstorei32 i64:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; } // Interpretation64Bit // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, - [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64; + [(store i64:$rS, DSForm:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, - [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, + [(store i64:$rS, XForm:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), "stdbrx $rS, $dst", IIC_LdStStore, - [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, + [(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } @@ -1567,26 +1567,26 @@ def : Pat<(not i64:$in), (i64not $in)>; // Extending loads with i64 targets. -def : Pat<(zextloadi1 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(zextloadi1 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi1 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), - (LBZ8 iaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), - (LBZX8 xaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), - (LHZ8 iaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), - (LHZX8 xaddr:$src)>; -def : Pat<(extloadi32 iaddr:$src), - (LWZ8 iaddr:$src)>; -def : Pat<(extloadi32 xaddr:$src), - (LWZX8 xaddr:$src)>; +def : Pat<(zextloadi1 DForm:$src), + (LBZ8 DForm:$src)>; +def : Pat<(zextloadi1 XForm:$src), + (LBZX8 XForm:$src)>; +def : Pat<(extloadi1 DForm:$src), + (LBZ8 DForm:$src)>; +def : Pat<(extloadi1 XForm:$src), + (LBZX8 XForm:$src)>; +def : Pat<(extloadi8 DForm:$src), + (LBZ8 DForm:$src)>; +def : Pat<(extloadi8 XForm:$src), + (LBZX8 XForm:$src)>; +def : Pat<(extloadi16 DForm:$src), + (LHZ8 DForm:$src)>; +def : Pat<(extloadi16 XForm:$src), + (LHZX8 XForm:$src)>; +def : Pat<(extloadi32 DForm:$src), + (LWZ8 DForm:$src)>; +def : Pat<(extloadi32 XForm:$src), + (LWZX8 XForm:$src)>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 6-bit and 7-bit shift @@ -1640,15 +1640,6 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), def : Pat<(i64 (PPCtoc_entry tglobaltlsaddr:$disp, i64:$reg)), (i64 (LDtoc tglobaltlsaddr:$disp, i64:$reg))>; -// Patterns to match r+r indexed loads and stores for -// addresses without at least 4-byte alignment. -def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)), - (LWAX xoaddr:$src)>; -def : Pat<(i64 (NonDSFormLoad xoaddr:$src)), - (LDX xoaddr:$src)>; -def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst), - (STDX $rS, xoaddr:$dst)>; - // 64-bits atomic loads and stores def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>; def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 6f2313f..6dfa495 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -411,46 +411,46 @@ let hasSideEffects = 1 in { let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; + [(set v16i8:$vD, (int_ppc_altivec_lvebx ForceXForm:$src))]>; def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$vD), (ins memrr:$src), "lvehx $vD, $src", IIC_LdStLoad, - [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; + [(set v8i16:$vD, (int_ppc_altivec_lvehx ForceXForm:$src))]>; def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$vD), (ins memrr:$src), "lvewx $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvewx ForceXForm:$src))]>; def LVX : XForm_1_memOp<31, 103, (outs vrrc:$vD), (ins memrr:$src), "lvx $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvx ForceXForm:$src))]>; def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$vD), (ins memrr:$src), "lvxl $vD, $src", IIC_LdStLoad, - [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvxl ForceXForm:$src))]>; } def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$vD), (ins memrr:$src), "lvsl $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsl ForceXForm:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$vD), (ins memrr:$src), "lvsr $vD, $src", IIC_LdStLoad, - [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsr ForceXForm:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores. def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), "stvebx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvebx v16i8:$rS, ForceXForm:$dst)]>; def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), "stvehx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvehx v8i16:$rS, ForceXForm:$dst)]>; def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), "stvewx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvewx v4i32:$rS, ForceXForm:$dst)]>; def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), "stvx $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvx v4i32:$rS, ForceXForm:$dst)]>; def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), "stvxl $rS, $dst", IIC_LdStStore, - [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvxl v4i32:$rS, ForceXForm:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. @@ -894,11 +894,11 @@ def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>; def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>; // Loads. -def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; +def : Pat<(v4i32 (load ForceXForm:$src)), (LVX ForceXForm:$src)>; // Stores. -def : Pat<(store v4i32:$rS, xoaddr:$dst), - (STVX $rS, xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, ForceXForm:$dst), + (STVX $rS, ForceXForm:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 4016345..709b6e6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1143,6 +1143,13 @@ def iaddroff : ComplexPattern; // PC Relative Address def pcreladdr : ComplexPattern; +// Load and Store Instruction Selection addressing modes. +def DForm : ComplexPattern; +def DSForm : ComplexPattern; +def DQForm : ComplexPattern; +def XForm : ComplexPattern; +def ForceXForm : ComplexPattern; + //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. def In32BitMode : Predicate<"!Subtarget->isPPC64()">; @@ -2221,25 +2228,25 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), let PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi8 iaddr:$src))]>; + [(set i32:$rD, (zextloadi8 DForm:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, - [(set i32:$rD, (sextloadi16 iaddr:$src))]>, + [(set i32:$rD, (sextloadi16 DForm:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), "lhz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi16 iaddr:$src))]>; + [(set i32:$rD, (zextloadi16 DForm:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), "lwz $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (load iaddr:$src))]>; + [(set i32:$rD, (load DForm:$src))]>; let Predicates = [HasFPU] in { def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), "lfs $rD, $src", IIC_LdStLFD, - [(set f32:$rD, (load iaddr:$src))]>; + [(set f32:$rD, (load DForm:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), "lfd $rD, $src", IIC_LdStLFD, - [(set f64:$rD, (load iaddr:$src))]>; + [(set f64:$rD, (load DForm:$src))]>; } @@ -2324,17 +2331,17 @@ def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi8 xaddr:$src))]>; + [(set i32:$rD, (zextloadi8 XForm:$src))]>; def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src), "lhax $rD, $src", IIC_LdStLHA, - [(set i32:$rD, (sextloadi16 xaddr:$src))]>, + [(set i32:$rD, (sextloadi16 XForm:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src), "lhzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (zextloadi16 xaddr:$src))]>; + [(set i32:$rD, (zextloadi16 XForm:$src))]>; def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, - [(set i32:$rD, (load xaddr:$src))]>; + [(set i32:$rD, (load XForm:$src))]>; def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src), "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; @@ -2345,10 +2352,10 @@ def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$rD), (ins memrr:$src), let Predicates = [HasFPU] in { def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src), "lfsx $frD, $src", IIC_LdStLFD, - [(set f32:$frD, (load xaddr:$src))]>; + [(set f32:$frD, (load XForm:$src))]>; def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src), "lfdx $frD, $src", IIC_LdStLFD, - [(set f64:$frD, (load xaddr:$src))]>; + [(set f64:$frD, (load XForm:$src))]>; def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src), "lfiwax $frD, $src", IIC_LdStLFD, @@ -2372,20 +2379,20 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst), "stb $rS, $dst", IIC_LdStStore, - [(truncstorei8 i32:$rS, iaddr:$dst)]>; + [(truncstorei8 i32:$rS, DForm:$dst)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst), "sth $rS, $dst", IIC_LdStStore, - [(truncstorei16 i32:$rS, iaddr:$dst)]>; + [(truncstorei16 i32:$rS, DForm:$dst)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst), "stw $rS, $dst", IIC_LdStStore, - [(store i32:$rS, iaddr:$dst)]>; + [(store i32:$rS, DForm:$dst)]>; let Predicates = [HasFPU] in { def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", IIC_LdStSTFD, - [(store f32:$rS, iaddr:$dst)]>; + [(store f32:$rS, DForm:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), "stfd $rS, $dst", IIC_LdStSTFD, - [(store f64:$rS, iaddr:$dst)]>; + [(store f64:$rS, DForm:$dst)]>; } } @@ -2428,15 +2435,15 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), let PPC970_Unit = 2 in { def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst), "stbx $rS, $dst", IIC_LdStStore, - [(truncstorei8 i32:$rS, xaddr:$dst)]>, + [(truncstorei8 i32:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst), "sthx $rS, $dst", IIC_LdStStore, - [(truncstorei16 i32:$rS, xaddr:$dst)]>, + [(truncstorei16 i32:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst), "stwx $rS, $dst", IIC_LdStStore, - [(store i32:$rS, xaddr:$dst)]>, + [(store i32:$rS, XForm:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst), @@ -2455,10 +2462,10 @@ def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", IIC_LdStSTFD, - [(store f32:$frS, xaddr:$dst)]>; + [(store f32:$frS, XForm:$dst)]>; def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), "stfdx $frS, $dst", IIC_LdStSTFD, - [(store f64:$frS, xaddr:$dst)]>; + [(store f64:$frS, XForm:$dst)]>; } } @@ -3558,27 +3565,27 @@ def : Pat<(srl i32:$rS, i32:$rB), def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; -def : Pat<(i32 (zextloadi1 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (zextloadi1 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi1 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (extloadi1 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi8 iaddr:$src)), - (LBZ iaddr:$src)>; -def : Pat<(i32 (extloadi8 xaddr:$src)), - (LBZX xaddr:$src)>; -def : Pat<(i32 (extloadi16 iaddr:$src)), - (LHZ iaddr:$src)>; -def : Pat<(i32 (extloadi16 xaddr:$src)), - (LHZX xaddr:$src)>; +def : Pat<(i32 (zextloadi1 DForm:$src)), + (LBZ DForm:$src)>; +def : Pat<(i32 (zextloadi1 XForm:$src)), + (LBZX XForm:$src)>; +def : Pat<(i32 (extloadi1 DForm:$src)), + (LBZ DForm:$src)>; +def : Pat<(i32 (extloadi1 XForm:$src)), + (LBZX XForm:$src)>; +def : Pat<(i32 (extloadi8 DForm:$src)), + (LBZ DForm:$src)>; +def : Pat<(i32 (extloadi8 XForm:$src)), + (LBZX XForm:$src)>; +def : Pat<(i32 (extloadi16 DForm:$src)), + (LHZ DForm:$src)>; +def : Pat<(i32 (extloadi16 XForm:$src)), + (LHZX XForm:$src)>; let Predicates = [HasFPU] in { -def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; -def : Pat<(f64 (extloadf32 xaddr:$src)), - (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 DForm:$src)), + (COPY_TO_REGCLASS (LFS DForm:$src), F8RC)>; +def : Pat<(f64 (extloadf32 XForm:$src)), + (COPY_TO_REGCLASS (LFSX XForm:$src), F8RC)>; def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index a374c5f..7c50d4b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2539,14 +2539,14 @@ let Predicates = [IsISA3_1] in { def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)), - (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)), - (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)), - (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>; - def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)), - (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 8)), + (v1i128 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 16)), + (v1i128 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 32)), + (v1i128 (COPY_TO_REGCLASS (LXVRWX ForceXForm:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 64)), + (v1i128 (COPY_TO_REGCLASS (LXVRDX ForceXForm:$src), VRRC))>; def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; @@ -2564,23 +2564,23 @@ let Predicates = [IsISA3_1, HasVSX] in { let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { // Store element 0 of a VSX register to memory - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst), - (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), - (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; - def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), - (STXVRWX $src, xoaddr:$dst)>; - def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), - (STXVRWX $src, xoaddr:$dst)>; - def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), - (STXVRDX $src, xoaddr:$dst)>; - def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), - (STXVRDX $src, xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), ForceXForm:$dst), + (STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), ForceXForm:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), ForceXForm:$dst), + (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), ForceXForm:$dst)>; + def : Pat<(store (i32 (extractelt v4i32:$src, 0)), ForceXForm:$dst), + (STXVRWX $src, ForceXForm:$dst)>; + def : Pat<(store (f32 (extractelt v4f32:$src, 0)), ForceXForm:$dst), + (STXVRWX $src, ForceXForm:$dst)>; + def : Pat<(store (i64 (extractelt v2i64:$src, 0)), ForceXForm:$dst), + (STXVRDX $src, ForceXForm:$dst)>; + def : Pat<(store (f64 (extractelt v2f64:$src, 0)), ForceXForm:$dst), + (STXVRDX $src, ForceXForm:$dst)>; // Load element 0 of a VSX register to memory - def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 xoaddr:$src)))), - (v8i16 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VSRC))>; - def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 xoaddr:$src)))), - (v16i8 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VSRC))>; + def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 ForceXForm:$src)))), + (v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>; + def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))), + (v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>; } // FIXME: The swap is overkill when the shift amount is a constant. diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 9b87efd..89c52fd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -316,13 +316,13 @@ let hasSideEffects = 0 in { let CodeSize = 3 in def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#XFLOADf64", - [(set f64:$XT, (load xoaddr:$src))]>; + [(set f64:$XT, (load ForceXForm:$src))]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVD2X : XX1Form_memOp<31, 844, (outs vsrc:$XT), (ins memrr:$src), "lxvd2x $XT, $src", IIC_LdStLFD, - [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x ForceXForm:$src))]>; def LXVDSX : XX1Form_memOp<31, 332, (outs vsrc:$XT), (ins memrr:$src), @@ -347,7 +347,7 @@ let hasSideEffects = 0 in { let CodeSize = 3 in def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#XFSTOREf64", - [(store f64:$XT, xoaddr:$dst)]>; + [(store f64:$XT, ForceXForm:$dst)]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // The behaviour of this instruction is endianness-specific so we provide no @@ -1128,15 +1128,15 @@ let Predicates = [HasVSX, HasP8Vector] in { let CodeSize = 3 in def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), "#XFLOADf32", - [(set f32:$XT, (load xoaddr:$src))]>; + [(set f32:$XT, (load ForceXForm:$src))]>; // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWAX", - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + [(set f64:$XT, (PPClfiwax ForceXForm:$src))]>; // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWZX", - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + [(set f64:$XT, (PPClfiwzx ForceXForm:$src))]>; } // mayLoad // VSX scalar stores introduced in ISA 2.07 @@ -1151,11 +1151,11 @@ let Predicates = [HasVSX, HasP8Vector] in { let CodeSize = 3 in def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), "#XFSTOREf32", - [(store f32:$XT, xoaddr:$dst)]>; + [(store f32:$XT, ForceXForm:$dst)]>; // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#STIWX", - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + [(PPCstfiwx f64:$XT, ForceXForm:$dst)]>; } // mayStore // VSX Elementary Scalar FP arithmetic (SP) @@ -1682,9 +1682,9 @@ let Predicates = [HasVSX, HasP9Vector] in { // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, - [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; + [(set f64:$XT, (PPClxsizx ForceXForm:$src, 1))]>; def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, - [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; + [(set f64:$XT, (PPClxsizx ForceXForm:$src, 2))]>; // Load Vector Halfword*8/Byte*16 Indexed def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; @@ -1692,7 +1692,7 @@ let Predicates = [HasVSX, HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xaddrX16:$src))]>; + [(set v2f64:$XT, (load XForm:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -1720,9 +1720,9 @@ let Predicates = [HasVSX, HasP9Vector] in { // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, - [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; + [(PPCstxsix f64:$XT, ForceXForm:$dst, 1)]>; def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, - [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; + [(PPCstxsix f64:$XT, ForceXForm:$dst, 2)]>; let isCodeGenOnly = 1 in { def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>; def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>; @@ -1734,7 +1734,7 @@ let Predicates = [HasVSX, HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xaddrX16:$dst)]>; + [(store v2f64:$XT, XForm:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form_memOp<31, 397, (outs), @@ -1751,16 +1751,16 @@ let Predicates = [HasVSX, HasP9Vector] in { def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddrX4:$src))]>; + [(set f32:$XT, (load DSForm:$src))]>; def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddrX4:$src))]>; + [(set f64:$XT, (load DSForm:$src))]>; def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddrX4:$dst)]>; + [(store f32:$XT, DSForm:$dst)]>; def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddrX4:$dst)]>; + [(store f64:$XT, DSForm:$dst)]>; let mayStore = 1 in { def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), @@ -1831,19 +1831,19 @@ def FpMinMax { } def ScalarLoads { - dag Li8 = (i32 (extloadi8 xoaddr:$src)); - dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); - dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); - dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); - dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); - - dag Li16 = (i32 (extloadi16 xoaddr:$src)); - dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); - dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); - dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); - dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); - - dag Li32 = (i32 (load xoaddr:$src)); + dag Li8 = (i32 (extloadi8 ForceXForm:$src)); + dag ZELi8 = (i32 (zextloadi8 ForceXForm:$src)); + dag ZELi8i64 = (i64 (zextloadi8 ForceXForm:$src)); + dag SELi8 = (i32 (sext_inreg (extloadi8 ForceXForm:$src), i8)); + dag SELi8i64 = (i64 (sext_inreg (extloadi8 ForceXForm:$src), i8)); + + dag Li16 = (i32 (extloadi16 ForceXForm:$src)); + dag ZELi16 = (i32 (zextloadi16 ForceXForm:$src)); + dag ZELi16i64 = (i64 (zextloadi16 ForceXForm:$src)); + dag SELi16 = (i32 (sextloadi16 ForceXForm:$src)); + dag SELi16i64 = (i64 (sextloadi16 ForceXForm:$src)); + + dag Li32 = (i32 (load ForceXForm:$src)); } def DWToSPExtractConv { @@ -2294,22 +2294,22 @@ def WordToDWord { } def FltToIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 ForceXForm:$A))))); } def FltToUIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 ForceXForm:$A))))); } def FltToLongLoad { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ForceXForm:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 DSForm:$A))))); } def FltToULongLoad { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ForceXForm:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 DSForm:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); @@ -2336,38 +2336,38 @@ def DblToULong { dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); } def DblToIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ForceXForm:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load DSForm:$A))))); } def DblToUIntLoad { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ForceXForm:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load DSForm:$A))))); } def DblToLongLoad { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load ForceXForm:$A))))); } def DblToULongLoad { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load ForceXForm:$A))))); } // FP load dags (for f32 -> v4f32) def LoadFP { - dag A = (f32 (load xoaddr:$A)); - dag B = (f32 (load xoaddr:$B)); - dag C = (f32 (load xoaddr:$C)); - dag D = (f32 (load xoaddr:$D)); + dag A = (f32 (load ForceXForm:$A)); + dag B = (f32 (load ForceXForm:$B)); + dag C = (f32 (load ForceXForm:$C)); + dag D = (f32 (load ForceXForm:$D)); } // FP merge dags (for f32 -> v4f32) def MrgFP { - dag LD32A = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64); - dag LD32B = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$B), sub_64); - dag LD32C = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$C), sub_64); - dag LD32D = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$D), sub_64); + dag LD32A = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64); + dag LD32B = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$B), sub_64); + dag LD32C = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$C), sub_64); + dag LD32D = (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$D), sub_64); dag AC = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), (SUBREG_TO_REG (i64 1), $C, sub_64), 0)); dag BD = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), @@ -2739,12 +2739,12 @@ def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), (f64 (XSMAXDP $A, $B))>; -def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst), + (STXVD2X $rS, ForceXForm:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, ForceXForm:$dst), + (STXVW4X $rS, ForceXForm:$dst)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; // Rounding for single precision. def : Pat<(f32 (any_fround f32:$S)), @@ -2790,18 +2790,18 @@ def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>; defm : ScalToVecWPermute< v4i32, FltToIntLoad.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, FltToUIntLoad.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; -def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), - (v2f64 (LXVDSX xoaddr:$A))>; -def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), - (v2i64 (LXVDSX xoaddr:$A))>; +def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)), + (v2f64 (LXVDSX ForceXForm:$A))>; +def : Pat<(v2i64 (PPCldsplat ForceXForm:$A)), + (v2i64 (LXVDSX ForceXForm:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -2812,10 +2812,10 @@ def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; defm : ScalToVecWPermute< v2i64, DblToLongLoad.A, - (XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>; + (XVCVDPSXDS (LXVDSX ForceXForm:$A)), (XVCVDPSXDS (LXVDSX ForceXForm:$A))>; defm : ScalToVecWPermute< v2i64, DblToULongLoad.A, - (XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>; + (XVCVDPUXDS (LXVDSX ForceXForm:$A)), (XVCVDPUXDS (LXVDSX ForceXForm:$A))>; } // HasVSX // Any big endian VSX subtarget. @@ -2943,14 +2943,14 @@ def : Pat<(f64 (extractelt v2f64:$S, 0)), def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; -def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(v2f64 (PPCld_vec_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; +def : Pat<(PPCst_vec_be v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; +def : Pat<(v4f32 (PPCld_vec_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>; +def : Pat<(PPCst_vec_be v4f32:$rS, ForceXForm:$dst), (STXVW4X $rS, ForceXForm:$dst)>; +def : Pat<(v2i64 (PPCld_vec_be ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; +def : Pat<(PPCst_vec_be v2i64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; +def : Pat<(v4i32 (PPCld_vec_be ForceXForm:$src)), (LXVW4X ForceXForm:$src)>; +def : Pat<(PPCst_vec_be v4i32:$rS, ForceXForm:$dst), (STXVW4X $rS, ForceXForm:$dst)>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), @@ -3059,30 +3059,30 @@ def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)), // Any pre-Power9 VSX subtarget. let Predicates = [HasVSX, NoP9Vector] in { def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), ForceXForm:$dst)>; // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). defm : ScalToVecWPermute< v4i32, DblToIntLoad.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 ForceXForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 ForceXForm:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoad.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 ForceXForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 ForceXForm:$A)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToLongLoad.A, - (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), - (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToULongLoad.A, - (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), - (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), 0), + (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$A), VSFRC)), sub_64)>; } // HasVSX, NoP9Vector @@ -3090,50 +3090,50 @@ defm : ScalToVecWPermute< let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in { // Load-and-splat using only X-Form VSX loads. defm : ScalToVecWPermute< - v2i64, (i64 (load xoaddr:$src)), - (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; + v2i64, (i64 (load ForceXForm:$src)), + (XXPERMDIs (XFLOADf64 ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< - v2f64, (f64 (load xoaddr:$src)), - (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; + v2f64, (f64 (load ForceXForm:$src)), + (XXPERMDIs (XFLOADf64 ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>; } // HasVSX, NoP9Vector, IsLittleEndian // Any VSX subtarget that only has loads and stores that load in big endian // order regardless of endianness. This is really pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { - def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v2f64 (PPClxvd2x ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; // Stores. - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst), + (STXVD2X $rS, ForceXForm:$dst)>; + def : Pat<(PPCstxvd2x v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; } // HasVSX, HasOnlySwappingMemOps // Big endian VSX subtarget that only has loads and stores that always // load in big endian order. Really big endian pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in { - def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; - def : Pat<(v2i64 (scalar_to_vector (i64 (load xoaddr:$src)))), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; + def : Pat<(v2f64 (load ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; + def : Pat<(v2i64 (load ForceXForm:$src)), (LXVD2X ForceXForm:$src)>; + def : Pat<(v4i32 (load ForceXForm:$src)), (LXVW4X ForceXForm:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x ForceXForm:$src)), (LXVW4X ForceXForm:$src)>; + def : Pat<(store v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; + def : Pat<(store v2i64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; + def : Pat<(store v4i32:$XT, ForceXForm:$dst), (STXVW4X $XT, ForceXForm:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, ForceXForm:$dst), + (STXVW4X $rS, ForceXForm:$dst)>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load ForceXForm:$src)))), + (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>; } // HasVSX, HasOnlySwappingMemOps, IsBigEndian // Any Power8 VSX subtarget. let Predicates = [HasVSX, HasP8Vector] in { def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; -def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; -def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), - (f32 (XFLOADf32 xoaddr:$src))>; +def : Pat<(f64 (extloadf32 ForceXForm:$src)), + (COPY_TO_REGCLASS (XFLOADf32 ForceXForm:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 ForceXForm:$src)))), + (f32 (XFLOADf32 ForceXForm:$src))>; def : Pat<(f64 (any_fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; @@ -3176,11 +3176,11 @@ def : Pat<(f32 (fneg f32:$S)), // Instructions for converting float to i32 feeding a store. def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 4), + (STIWX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 4), + (STIWX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>; def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), @@ -3259,18 +3259,18 @@ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), // LIWAX - This instruction is used for sign extending i32 -> i64. // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). -def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))), + (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))), + (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>; defm : ScalToVecWPermute< - v4i32, (i32 (load xoaddr:$src)), - (XXSLDWIs (LIWZX xoaddr:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4i32, (i32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< - v4f32, (f32 (load xoaddr:$src)), - (XXSLDWIs (LIWZX xoaddr:$src), 1), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4f32, (f32 (load ForceXForm:$src)), + (XXSLDWIs (LIWZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat; -def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; // Elements in a register on a BE system are in order <0, 1, 2, 3>. // The store instructions store the second word from the left. // So to align element zero, we need to modulo-left-shift by 3 words. // Similar logic applies for elements 2 and 3. foreach Idx = [ [0,3], [2,1], [3,2] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), ForceXForm:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + sub_64), ForceXForm:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), ForceXForm:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; + sub_64), ForceXForm:$src)>; } } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 @@ -3346,24 +3346,24 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). defm : ScalToVecWPermute< - v2i64, (i64 (sextloadi32 xoaddr:$src)), - (XXPERMDIs (LIWAX xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>; + v2i64, (i64 (sextloadi32 ForceXForm:$src)), + (XXPERMDIs (LIWAX ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< - v2i64, (i64 (zextloadi32 xoaddr:$src)), - (XXPERMDIs (LIWZX xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v2i64, (i64 (zextloadi32 ForceXForm:$src)), + (XXPERMDIs (LIWZX ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< - v4i32, (i32 (load xoaddr:$src)), - (XXPERMDIs (LIWZX xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4i32, (i32 (load ForceXForm:$src)), + (XXPERMDIs (LIWZX ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< - v4f32, (f32 (load xoaddr:$src)), - (XXPERMDIs (LIWZX xoaddr:$src), 2), - (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; + v4f32, (f32 (load ForceXForm:$src)), + (XXPERMDIs (LIWZX ForceXForm:$src), 2), + (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>; def : Pat; -def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (i32 (extractelt v4i32:$A, 2)), ForceXForm:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 2)), ForceXForm:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; // Elements in a register on a LE system are in order <3, 2, 1, 0>. // The store instructions store the second word from the left. // So to align element 3, we need to modulo-left-shift by 3 words. // Similar logic applies for elements 0 and 1. foreach Idx = [ [0,2], [1,1], [3,3] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), ForceXForm:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + sub_64), ForceXForm:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), ForceXForm:$src), (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; + sub_64), ForceXForm:$src)>; } } // HasVSX, HasP8Vector, IsLittleEndian // Big endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in { -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ForceXForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + ForceXForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ForceXForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; + ForceXForm:$src)>; } // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in { -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + ForceXForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ForceXForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + ForceXForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ForceXForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>; } // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian // Any VSX target with direct moves. @@ -3720,12 +3720,12 @@ def : Pat<(f128 (any_uint_to_fp (i64 (PPCmfvsr f64:$src)))), // Convert (Un)Signed Word -> QP. def : Pat<(f128 (any_sint_to_fp i32:$src)), (f128 (XSCVSDQP (MTVSRWA $src)))>; -def : Pat<(f128 (any_sint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; +def : Pat<(f128 (any_sint_to_fp (i32 (load ForceXForm:$src)))), + (f128 (XSCVSDQP (LIWAX ForceXForm:$src)))>; def : Pat<(f128 (any_uint_to_fp i32:$src)), (f128 (XSCVUDQP (MTVSRWZ $src)))>; -def : Pat<(f128 (any_uint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; +def : Pat<(f128 (any_uint_to_fp (i32 (load ForceXForm:$src)))), + (f128 (XSCVUDQP (LIWZX ForceXForm:$src)))>; // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a // separate pattern so that it can convert the input register class from @@ -3766,95 +3766,95 @@ def : Pat<(v1i128 (bswap v1i128 :$A)), (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; // D-Form Load/Store -def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), +def : Pat<(v4i32 (quadwOffsetLoad DQForm:$src)), (LXV memrix16:$src)>; +def : Pat<(v4f32 (quadwOffsetLoad DQForm:$src)), (LXV memrix16:$src)>; +def : Pat<(v2i64 (quadwOffsetLoad DQForm:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (quadwOffsetLoad DQForm:$src)), (LXV memrix16:$src)>; +def : Pat<(f128 (quadwOffsetLoad DQForm:$src)), (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x DQForm:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x DQForm:$src)), (LXV memrix16:$src)>; -def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), +def : Pat<(quadwOffsetStore v4f32:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v4i32:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v2f64:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore f128:$rS, DQForm:$dst), (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; -def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), +def : Pat<(quadwOffsetStore v2i64:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, DQForm:$dst), (STXV $rS, memrix16:$dst)>; -def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; -def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), - (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; -def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), - (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; +def : Pat<(v2f64 (nonQuadwOffsetLoad ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(v2i64 (nonQuadwOffsetLoad ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(v4f32 (nonQuadwOffsetLoad ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(v4i32 (nonQuadwOffsetLoad ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x ForceXForm:$src)), (LXVX ForceXForm:$src)>; +def : Pat<(f128 (nonQuadwOffsetLoad ForceXForm:$src)), + (COPY_TO_REGCLASS (LXVX ForceXForm:$src), VRRC)>; +def : Pat<(nonQuadwOffsetStore f128:$rS, ForceXForm:$dst), + (STXVX (COPY_TO_REGCLASS $rS, VSRC), ForceXForm:$dst)>; +def : Pat<(nonQuadwOffsetStore v2f64:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; +def : Pat<(nonQuadwOffsetStore v2i64:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; +def : Pat<(nonQuadwOffsetStore v4f32:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; +def : Pat<(nonQuadwOffsetStore v4i32:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst), + (STXVX $rS, ForceXForm:$dst)>; // Build vectors from i8 loads defm : ScalToVecWPermute; + (VSPLTHs 3, (LXSIBZX ForceXForm:$src)), + (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute; + (XXSPLTWs (LXSIBZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute; + (XXPERMDIs (LXSIBZX ForceXForm:$src), 0), + (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< v4i32, ScalarLoads.SELi8, - (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1), - (SUBREG_TO_REG (i64 1), (VEXTSB2Ws (LXSIBZX xoaddr:$src)), sub_64)>; + (XXSPLTWs (VEXTSB2Ws (LXSIBZX ForceXForm:$src)), 1), + (SUBREG_TO_REG (i64 1), (VEXTSB2Ws (LXSIBZX ForceXForm:$src)), sub_64)>; defm : ScalToVecWPermute< v2i64, ScalarLoads.SELi8i64, - (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0), - (SUBREG_TO_REG (i64 1), (VEXTSB2Ds (LXSIBZX xoaddr:$src)), sub_64)>; + (XXPERMDIs (VEXTSB2Ds (LXSIBZX ForceXForm:$src)), 0), + (SUBREG_TO_REG (i64 1), (VEXTSB2Ds (LXSIBZX ForceXForm:$src)), sub_64)>; // Build vectors from i16 loads defm : ScalToVecWPermute< v4i32, ScalarLoads.ZELi16, - (XXSPLTWs (LXSIHZX xoaddr:$src), 1), - (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>; + (XXSPLTWs (LXSIHZX ForceXForm:$src), 1), + (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< v2i64, ScalarLoads.ZELi16i64, - (XXPERMDIs (LXSIHZX xoaddr:$src), 0), - (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>; + (XXPERMDIs (LXSIHZX ForceXForm:$src), 0), + (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>; defm : ScalToVecWPermute< v4i32, ScalarLoads.SELi16, - (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1), - (SUBREG_TO_REG (i64 1), (VEXTSH2Ws (LXSIHZX xoaddr:$src)), sub_64)>; + (XXSPLTWs (VEXTSH2Ws (LXSIHZX ForceXForm:$src)), 1), + (SUBREG_TO_REG (i64 1), (VEXTSH2Ws (LXSIHZX ForceXForm:$src)), sub_64)>; defm : ScalToVecWPermute< v2i64, ScalarLoads.SELi16i64, - (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0), - (SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX xoaddr:$src)), sub_64)>; + (XXPERMDIs (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), 0), + (SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), sub_64)>; // Load/convert and convert/store patterns for f16. -def : Pat<(f64 (extloadf16 xoaddr:$src)), - (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; -def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; -def : Pat<(f32 (extloadf16 xoaddr:$src)), - (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; -def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; +def : Pat<(f64 (extloadf16 ForceXForm:$src)), + (f64 (XSCVHPDP (LXSIHZX ForceXForm:$src)))>; +def : Pat<(truncstoref16 f64:$src, ForceXForm:$dst), + (STXSIHX (XSCVDPHP $src), ForceXForm:$dst)>; +def : Pat<(f32 (extloadf16 ForceXForm:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX ForceXForm:$src)), VSSRC))>; +def : Pat<(truncstoref16 f32:$src, ForceXForm:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), ForceXForm:$dst)>; def : Pat<(f64 (f16_to_fp i32:$A)), (f64 (XSCVHPDP (MTVSRWZ $A)))>; def : Pat<(f32 (f16_to_fp i32:$A)), @@ -3869,33 +3869,33 @@ def : Pat<(f64 (PPCVexts f64:$A, 1)), def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; -def : Pat<(f64 (extloadf32 iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; -def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), - (f32 (DFLOADf32 iaddrX4:$src))>; +def : Pat<(f64 (extloadf32 DSForm:$src)), + (COPY_TO_REGCLASS (DFLOADf32 DSForm:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 DSForm:$src)))), + (f32 (DFLOADf32 DSForm:$src))>; -def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddr:$src), sub_64)>; -def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), - (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; +def : Pat<(v4f32 (PPCldvsxlh XForm:$src)), + (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>; +def : Pat<(v4f32 (PPCldvsxlh DSForm:$src)), + (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>; // Convert (Un)Signed DWord in memory -> QP -def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; -def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; -def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; -def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; +def : Pat<(f128 (sint_to_fp (i64 (load XForm:$src)))), + (f128 (XSCVSDQP (LXSDX XForm:$src)))>; +def : Pat<(f128 (sint_to_fp (i64 (load DSForm:$src)))), + (f128 (XSCVSDQP (LXSD DSForm:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load XForm:$src)))), + (f128 (XSCVUDQP (LXSDX XForm:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load DSForm:$src)))), + (f128 (XSCVUDQP (LXSD DSForm:$src)))>; // Convert Unsigned HWord in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), - (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; + (f128 (XSCVUDQP (LXSIHZX XForm:$src)))>; // Convert Unsigned Byte in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), - (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; + (f128 (XSCVUDQP (LXSIBZX ForceXForm:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; @@ -3908,65 +3908,65 @@ def : Pat<(i32 (any_fp_to_uint f128:$src)), // Instructions for store(fptosi). // The 8-byte version is repeated here due to availability of D-Form STXSD. def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), XForm:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - xaddrX4:$dst)>; + XForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), DSForm:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - iaddrX4:$dst)>; + DSForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), XForm:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), XForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), DSForm:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), DSForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 2), + (STXSIHX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 1), + (STXSIBX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>; // Instructions for store(fptoui). def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), XForm:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - xaddrX4:$dst)>; + XForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), DSForm:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - iaddrX4:$dst)>; + DSForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), XForm:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), XForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), DSForm:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), DSForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 2), + (STXSIHX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 1), + (STXSIBX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>; // Round & Convert QP -> DP/SP def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>; @@ -4001,36 +4001,36 @@ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; defm : ScalToVecWPermute< v4i32, FltToIntLoad.A, - (XVCVSPSXWS (LXVWSX xoaddr:$A)), - (XVCVSPSXWS (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64))>; + (XVCVSPSXWS (LXVWSX ForceXForm:$A)), + (XVCVSPSXWS (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64))>; defm : ScalToVecWPermute< v4i32, FltToUIntLoad.A, - (XVCVSPUXWS (LXVWSX xoaddr:$A)), - (XVCVSPUXWS (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64))>; + (XVCVSPUXWS (LXVWSX ForceXForm:$A)), + (XVCVSPUXWS (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$A), sub_64))>; defm : ScalToVecWPermute< v4i32, DblToIntLoadP9.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 DSForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 DSForm:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoadP9.A, - (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1), - (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 DSForm:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 DSForm:$A)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToLongLoadP9.A, - (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), 0), (SUBREG_TO_REG (i64 1), - (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; + (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToULongLoadP9.A, - (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0), + (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), 0), (SUBREG_TO_REG (i64 1), - (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>; -def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), - (v4f32 (LXVWSX xoaddr:$A))>; -def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), - (v4i32 (LXVWSX xoaddr:$A))>; + (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 DSForm:$A), VSFRC)), sub_64)>; +def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)), + (v4f32 (LXVWSX ForceXForm:$A))>; +def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)), + (v4i32 (LXVWSX ForceXForm:$A))>; } // HasVSX, HasP9Vector // Any Power9 VSX subtarget with equivalent length but better Power10 VSX @@ -4044,13 +4044,13 @@ let Predicates = [HasVSX, HasP9Vector, NoP10Vector] in { // The NoP10Vector predicate excludes these patterns from Power10 VSX subtargets. defm : ScalToVecWPermute< v16i8, ScalarLoads.Li8, - (VSPLTBs 7, (LXSIBZX xoaddr:$src)), - (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>; + (VSPLTBs 7, (LXSIBZX ForceXForm:$src)), + (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>; // Build vectors from i16 loads defm : ScalToVecWPermute< v8i16, ScalarLoads.Li16, - (VSPLTHs 3, (LXSIHZX xoaddr:$src)), - (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>; + (VSPLTHs 3, (LXSIHZX ForceXForm:$src)), + (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>; } // HasVSX, HasP9Vector, NoP10Vector // Any big endian Power9 VSX subtarget @@ -4060,13 +4060,13 @@ let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in { // Build vectors from i8 loads defm : ScalToVecWPermute< v16i8, ScalarLoads.Li8, - (VSPLTBs 7, (LXSIBZX xoaddr:$src)), - (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>; + (VSPLTBs 7, (LXSIBZX ForceXForm:$src)), + (SUBREG_TO_REG (i64 1), (LXSIBZX ForceXForm:$src), sub_64)>; // Build vectors from i16 loads defm : ScalToVecWPermute< v8i16, ScalarLoads.Li16, - (VSPLTHs 3, (LXSIHZX xoaddr:$src)), - (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>; + (VSPLTHs 3, (LXSIHZX ForceXForm:$src)), + (SUBREG_TO_REG (i64 1), (LXSIHZX ForceXForm:$src), sub_64)>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; @@ -4102,89 +4102,89 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; // Scalar stores of i8 -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; // Scalar stores of i16 -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; } // HasVSX, HasP9Vector, IsBigEndian // Big endian 64Bit Power9 subtarget. let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { -def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>; - -def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>; -def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), +def : Pat<(v2i64 (scalar_to_vector (i64 (load DSForm:$src)))), + (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (load XForm:$src)))), + (v2i64 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64))>; + +def : Pat<(v2f64 (scalar_to_vector (f64 (load DSForm:$src)))), + (v2f64 (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64))>; +def : Pat<(v2f64 (scalar_to_vector (f64 (load XForm:$src)))), + (v2f64 (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64))>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), XForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + sub_64), XForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), XForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + sub_64), XForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), XForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), XForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), DSForm:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + sub_64), DSForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), DSForm:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + sub_64), DSForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), DSForm:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), DSForm:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>; // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), @@ -4280,105 +4280,105 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; -def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; -def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), - (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; +def : Pat<(v8i16 (PPCld_vec_be ForceXForm:$src)), + (COPY_TO_REGCLASS (LXVH8X ForceXForm:$src), VRRC)>; +def : Pat<(PPCst_vec_be v8i16:$rS, ForceXForm:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), ForceXForm:$dst)>; -def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; -def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), - (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; +def : Pat<(v16i8 (PPCld_vec_be ForceXForm:$src)), + (COPY_TO_REGCLASS (LXVB16X ForceXForm:$src), VRRC)>; +def : Pat<(PPCst_vec_be v16i8:$rS, ForceXForm:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), ForceXForm:$dst)>; // Scalar stores of i8 -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), ForceXForm:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>; // Scalar stores of i16 -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; -def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), ForceXForm:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), ForceXForm:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), ForceXForm:$dst)>; defm : ScalToVecWPermute< - v2i64, (i64 (load iaddrX4:$src)), - (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2), - (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; + v2i64, (i64 (load DSForm:$src)), + (XXPERMDIs (DFLOADf64 DSForm:$src), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>; defm : ScalToVecWPermute< - v2i64, (i64 (load xaddrX4:$src)), - (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; + v2i64, (i64 (load XForm:$src)), + (XXPERMDIs (XFLOADf64 XForm:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>; defm : ScalToVecWPermute< - v2f64, (f64 (load iaddrX4:$src)), - (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2), - (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; + v2f64, (f64 (load DSForm:$src)), + (XXPERMDIs (DFLOADf64 DSForm:$src), 2), + (SUBREG_TO_REG (i64 1), (DFLOADf64 DSForm:$src), sub_64)>; defm : ScalToVecWPermute< - v2f64, (f64 (load xaddrX4:$src)), - (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2), - (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; + v2f64, (f64 (load XForm:$src)), + (XXPERMDIs (XFLOADf64 XForm:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 XForm:$src), sub_64)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), XForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + sub_64), XForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), XForm:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + sub_64), XForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), XForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), XForm:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), XForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), DSForm:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + sub_64), DSForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), DSForm:$src), (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - iaddrX4:$src)>; -def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; -def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + DSForm:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), DSForm:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), DSForm:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), DSForm:$src)>; // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), diff --git a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll index d6ed3dc..0591253 100644 --- a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll +++ b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll @@ -9,7 +9,7 @@ define dso_local void @AlignDSForm() local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r3, r2, best8x8mode@toc@ha ; CHECK-NEXT: addi r3, r3, best8x8mode@toc@l -; CHECK-NEXT: ldx r3, 0, r3 +; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: std r3, 0(r3) entry: %0 = load <4 x i16>, <4 x i16>* bitcast ([4 x i16]* @best8x8mode to <4 x i16>*), align 2