// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
- if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
- Splat && N1.hasOneUse()) {
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
+ N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
+ auto NewLoad = DAG.getMaskedLoad(
ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ bool LoadHasOtherUsers = !N0.hasOneUse();
+ CombineTo(N, NewLoad);
+ if (LoadHasOtherUsers)
+ CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
+ return SDValue(N, 0);
}
}
}
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: cmphs p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, #0
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
; CHECK-NEXT: ret
entry:
; CHECK-LE-NEXT: vmov r0, r1, d8
; CHECK-LE-NEXT: vmov r2, r3, d9
; CHECK-LE-NEXT: bl foo
-; CHECK-LE-NEXT: vmovlb.u16 q0, q4
+; CHECK-LE-NEXT: vmov q0, q4
; CHECK-LE-NEXT: vpop {d8, d9}
; CHECK-LE-NEXT: pop {r7, pc}
;
; CHECK-BE-NEXT: vpush {d8, d9}
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
-; CHECK-BE-NEXT: vldrht.u32 q4, [r0]
-; CHECK-BE-NEXT: vrev64.32 q0, q4
-; CHECK-BE-NEXT: vmov r1, r0, d0
-; CHECK-BE-NEXT: vmov r3, r2, d1
+; CHECK-BE-NEXT: vldrht.u32 q0, [r0]
+; CHECK-BE-NEXT: vrev64.32 q4, q0
+; CHECK-BE-NEXT: vmov r1, r0, d8
+; CHECK-BE-NEXT: vmov r3, r2, d9
; CHECK-BE-NEXT: bl foo
-; CHECK-BE-NEXT: vmovlb.u16 q1, q4
-; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: vmov q0, q4
; CHECK-BE-NEXT: vpop {d8, d9}
; CHECK-BE-NEXT: pop {r7, pc}
entry: