This avoids undefs from being expanded to a build vector of zeroes.
As noted by @craig.topper in D153399
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D153411
OtherVT, VT, Expand);
}
+ // Custom lower fixed vector undefs to scalable vector undefs to avoid
+ // expansion to a build_vector of 0s.
+ setOperationAction(ISD::UNDEF, VT, Custom);
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
setTruncStoreAction(VT, OtherVT, Expand);
}
+ // Custom lower fixed vector undefs to scalable vector undefs to avoid
+ // expansion to a build_vector of 0s.
+ setOperationAction(ISD::UNDEF, VT, Custom);
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
return lowerVPREDUCE(Op, DAG);
+ case ISD::UNDEF: {
+ MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
+ return convertFromScalableVector(Op.getSimpleValueType(),
+ DAG.getUNDEF(ContainerVT), DAG, Subtarget);
+ }
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
; CHECK-LABEL: widen_splat_ve3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vrgather.vi v8, v9, 3
+; CHECK-NEXT: vrgather.vi v9, v8, 3
+; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x i8> %shuf
; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT: vmv.x.s a2, v8
; RV32-NEXT: andi a3, a2, 1
+; RV32-NEXT: # implicit-def: $v8
; RV32-NEXT: beqz a3, .LBB8_2
; RV32-NEXT: # %bb.1: # %cond.load
; RV32-NEXT: lbu a3, 1(a0)
; RV32-NEXT: or a3, a4, a3
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
-; RV32-NEXT: andi a2, a2, 2
-; RV32-NEXT: bnez a2, .LBB8_3
-; RV32-NEXT: j .LBB8_4
-; RV32-NEXT: .LBB8_2:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: .LBB8_2: # %else
; RV32-NEXT: andi a2, a2, 2
; RV32-NEXT: beqz a2, .LBB8_4
-; RV32-NEXT: .LBB8_3: # %cond.load1
+; RV32-NEXT: # %bb.3: # %cond.load1
; RV32-NEXT: lbu a2, 5(a0)
; RV32-NEXT: lbu a3, 4(a0)
; RV32-NEXT: lbu a4, 6(a0)
; RV32-NEXT: slli a0, a0, 24
; RV32-NEXT: or a0, a0, a4
; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: .LBB8_4: # %else2
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: ret
;
; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT: vmv.x.s a2, v8
; RV64-NEXT: andi a3, a2, 1
+; RV64-NEXT: # implicit-def: $v8
; RV64-NEXT: beqz a3, .LBB8_2
; RV64-NEXT: # %bb.1: # %cond.load
; RV64-NEXT: lbu a3, 1(a0)
; RV64-NEXT: or a3, a4, a3
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vmv.v.x v8, a3
-; RV64-NEXT: andi a2, a2, 2
-; RV64-NEXT: bnez a2, .LBB8_3
-; RV64-NEXT: j .LBB8_4
-; RV64-NEXT: .LBB8_2:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: .LBB8_2: # %else
; RV64-NEXT: andi a2, a2, 2
; RV64-NEXT: beqz a2, .LBB8_4
-; RV64-NEXT: .LBB8_3: # %cond.load1
+; RV64-NEXT: # %bb.3: # %cond.load1
; RV64-NEXT: lbu a2, 5(a0)
; RV64-NEXT: lbu a3, 4(a0)
; RV64-NEXT: lbu a4, 6(a0)
; RV64-NEXT: slli a0, a0, 24
; RV64-NEXT: or a0, a0, a4
; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vslideup.vi v8, v9, 1
; RV64-NEXT: .LBB8_4: # %else2
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vse32.v v8, (a1)
; RV64-NEXT: ret
%mask = icmp eq <2 x i32> %m, zeroinitializer
; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vle16.v v12, (a0)
-; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vsetivli zero, 16, e16, m4, tu, ma
-; CHECK-NEXT: vmv.v.v v16, v8
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vrgather.vv v8, v16, v12
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%v32f16 = shufflevector <16 x half> %a, <16 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x half> %v32f16
define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; CHECK-LABEL: vector_interleave_v32i1_v16i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vmclr.m v9
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
-; CHECK-NEXT: vmv.v.v v9, v0
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 2
+; CHECK-NEXT: vslideup.vi v0, v8, 2
+; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
; RV32-LABEL: vector_interleave_v4i64_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV32-NEXT: vmv.v.v v10, v8
+; RV32-NEXT: vmv1r.v v10, v9
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: lui a0, %hi(.LCPI3_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI3_0)
; RV32-NEXT: vle16.v v12, (a0)
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: vslideup.vi v10, v8, 2
-; RV32-NEXT: vrgatherei16.vv v8, v10, v12
+; RV32-NEXT: vslideup.vi v8, v10, 2
+; RV32-NEXT: vrgatherei16.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vector_interleave_v4i64_v2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV64-NEXT: vmv.v.v v10, v8
+; RV64-NEXT: vmv1r.v v10, v9
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: lui a0, %hi(.LCPI3_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI3_0)
; RV64-NEXT: vle64.v v12, (a0)
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: vslideup.vi v10, v8, 2
-; RV64-NEXT: vrgather.vv v8, v10, v12
+; RV64-NEXT: vslideup.vi v8, v10, 2
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
ret <4 x i64> %res
define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
; RV32-LABEL: vector_interleave_v4f64_v2f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV32-NEXT: vmv.v.v v10, v8
+; RV32-NEXT: vmv1r.v v10, v9
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: lui a0, %hi(.LCPI9_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI9_0)
; RV32-NEXT: vle16.v v12, (a0)
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: vslideup.vi v10, v8, 2
-; RV32-NEXT: vrgatherei16.vv v8, v10, v12
+; RV32-NEXT: vslideup.vi v8, v10, 2
+; RV32-NEXT: vrgatherei16.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vector_interleave_v4f64_v2f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma
-; RV64-NEXT: vmv.v.v v10, v8
+; RV64-NEXT: vmv1r.v v10, v9
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: lui a0, %hi(.LCPI9_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI9_0)
; RV64-NEXT: vle64.v v12, (a0)
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: vslideup.vi v10, v8, 2
-; RV64-NEXT: vrgather.vv v8, v10, v12
+; RV64-NEXT: vslideup.vi v8, v10, 2
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
ret <4 x double> %res