setOperationAction(ISD::LOAD, MVT::v8f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f64, Custom);
setOperationAction(ISD::LOAD, MVT::v4i64, Custom);
- // 128-bit non-temporal loads can be lowered to LDNP using custom lowering.
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2i64, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i16, Custom);
- setOperationAction(ISD::LOAD, MVT::v16i8, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::LDNP)
- MAKE_CASE(AArch64ISD::LDNP128)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
- // Handle lowering 128-bit non temporal loads for little-endian targets.
- EVT MemVT = LoadNode->getMemoryVT();
- if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() &&
- MemVT.getSizeInBits() == 128 &&
- (MemVT.getScalarSizeInBits() == 8u ||
- MemVT.getScalarSizeInBits() == 16u ||
- MemVT.getScalarSizeInBits() == 32u ||
- MemVT.getScalarSizeInBits() == 64u)) {
-
- SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::LDNP128, DL,
- DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MVT::Other}),
- {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
- LoadNode->getMemOperand());
-
- SDValue P = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), MemVT,
- Result.getValue(0), Result.getValue(1));
- return DAG.getMergeValues({P, Result.getValue(2) /* Chain */}, DL);
- }
if (LoadNode->getMemoryVT() == MVT::i64x8) {
SmallVector<SDValue, 8> Ops;
// Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
+ assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
- if ((VT != MVT::v4i16 && VT != MVT::v4i32) ||
- LoadNode->getMemoryVT() != MVT::v4i8)
+ if (LoadNode->getMemoryVT() != MVT::v4i8)
return SDValue();
unsigned ExtType;
LDP,
LDNP,
- LDNP128,
STP,
STNP,
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64ldnp128 : SDTypeProfile<2, 1, [SDTCisVT<0, v2i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def AArch64ldnp128 : SDNode<"AArch64ISD::LDNP128", SDT_AArch64ldnp128, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
(LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
-
-def : Pat<(AArch64ldnp128 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
- (LDNPDi GPR64sp:$Rn, simm7s8:$offset)>;
//---
// (register offset)
//---
define <4 x i32> @test_ldnp_v4i32(<4 x i32>* %A) {
; CHECK-LABEL: test_ldnp_v4i32:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldnp d0, d1, [x0]
-; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v4i32:
define <4 x float> @test_ldnp_v4f32(<4 x float>* %A) {
; CHECK-LABEL: test_ldnp_v4f32:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldnp d0, d1, [x0]
-; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v4f32:
define <8 x i16> @test_ldnp_v8i16(<8 x i16>* %A) {
; CHECK-LABEL: test_ldnp_v8i16:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldnp d0, d1, [x0]
-; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v8i16:
define <16 x i8> @test_ldnp_v16i8(<16 x i8>* %A) {
; CHECK-LABEL: test_ldnp_v16i8:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldnp d0, d1, [x0]
-; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v16i8:
define <2 x double> @test_ldnp_v2f64(<2 x double>* %A) {
; CHECK-LABEL: test_ldnp_v2f64:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldnp d0, d1, [x0]
-; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v2f64: