ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MSTORE);
return SDValue();
}
+static SDValue performTruncateCombine(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ if (VT.isFixedLengthVector() && VT.is64BitVector() && N0.hasOneUse() &&
+ N0.getOpcode() == AArch64ISD::DUP) {
+ SDValue Op = N0.getOperand(0);
+ if (VT.getScalarType() == MVT::i32 &&
+ N0.getOperand(0).getValueType().getScalarType() == MVT::i64)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i32, Op);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Op);
+ }
+
+ return SDValue();
+}
+
// Check an node is an extend or shift operand
static bool isExtendOrShiftOperand(SDValue N) {
unsigned Opcode = N.getOpcode();
return performAddSubCombine(N, DCI, DAG);
case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);
+ case ISD::TRUNCATE:
+ return performTruncateCombine(N, DAG);
case AArch64ISD::ANDS:
return performFlagSettingCombine(N, DCI, ISD::AND);
case AArch64ISD::ADC:
define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: and x8, x0, #0xffff
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: xtn v2.2s, v2.2d
; CHECK-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-NEXT: ret
entry:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: movi v1.8b, #1
-; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: dup v2.8b, w8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: xtn v2.8b, v2.8h
; CHECK-NEXT: umull v0.8h, v2.8b, v0.8b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: umull_and_v8i32_dup:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: xtn v2.4h, v2.4s
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v1.4s, v0.8h, v2.8h
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
-; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i16> %src1 to <8 x i32>
define <4 x i64> @umull_and_v4i64_dup(<4 x i32> %src1, i64 %src2) {
; CHECK-LABEL: umull_and_v4i64_dup:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and x8, x0, #0xff
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: dup v2.2d, x8
-; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: dup v2.4s, w8
+; CHECK-NEXT: umull2 v1.2d, v0.4s, v2.4s
; CHECK-NEXT: umull v0.2d, v0.2s, v2.2s
-; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ret
entry:
%in1 = zext <4 x i32> %src1 to <4 x i64>
; CHECK-LABEL: no_combine:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.4h, #4
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: dup v1.4h, w0
; CHECK-NEXT: mov v1.d[1], v0.d[0]
; CHECK-NEXT: uzp1 v0.16b, v1.16b, v1.16b
; CHECK-NEXT: str q0, [x8]