return Res;
}
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
SDValue Src = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+
+ // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
+ if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
+ return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
+ }
// We may have a bitcast of something that has already had this bitcast
// combine performed on it, so skip past any VECTOR_REG_CASTs.
// Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
// would be generated is at least the width of the element type.
EVT SrcVT = Src.getValueType();
- EVT DstVT = N->getValueType(0);
if ((Src.getOpcode() == ARMISD::VMOVIMM ||
Src.getOpcode() == ARMISD::VMVNIMM ||
Src.getOpcode() == ARMISD::VMOVFPIMM) &&
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::BITCAST:
- return PerformBITCASTCombine(N, DCI.DAG);
+ return PerformBITCASTCombine(N, DCI.DAG, Subtarget);
case ARMISD::PREDICATE_CAST:
return PerformPREDICATE_CASTCombine(N, DCI);
case ARMISD::VECTOR_REG_CAST:
; CHECK-LABEL: vaddqr_v4f32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s4
-; CHECK-NEXT: vdup.32 q1, r0
-; CHECK-NEXT: vadd.f32 q0, q0, q1
+; CHECK-NEXT: vadd.f32 q0, q0, r0
; CHECK-NEXT: bx lr
entry:
%src2bc = bitcast float %src2 to i32
; CHECK-LABEL: vaddqr_v8f16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r0, [r0]
-; CHECK-NEXT: vdup.16 q1, r0
-; CHECK-NEXT: vadd.f16 q0, q0, q1
+; CHECK-NEXT: vadd.f16 q0, q0, r0
; CHECK-NEXT: bx lr
entry:
%src2 = load half, half *%src2p, align 2
; CHECK-LABEL: vaddqr_v4f32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s4
-; CHECK-NEXT: vdup.32 q1, r0
-; CHECK-NEXT: vadd.f32 q0, q1, q0
+; CHECK-NEXT: vadd.f32 q0, q0, r0
; CHECK-NEXT: bx lr
entry:
%src2bc = bitcast float %src2 to i32
; CHECK-LABEL: vaddqr_v8f16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r0, [r0]
-; CHECK-NEXT: vdup.16 q1, r0
-; CHECK-NEXT: vadd.f16 q0, q1, q0
+; CHECK-NEXT: vadd.f16 q0, q0, r0
; CHECK-NEXT: bx lr
entry:
%src2 = load half, half *%src2p, align 2
; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
-; CHECK-MVEFP-NEXT: vdup.16 q3, r0
-; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, q3
+; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry: