From 485d25007a2faaa2e3deec1e9fafb7dd1c8e4536 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 21 Jun 2023 13:13:40 +0100 Subject: [PATCH] [RISCV] Custom lower fixed vector undef to scalable undef This avoids undefs from being expanded to a build vector of zeroes. As noted by @craig.topper in D153399 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D153411 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +++++ .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 7 +-- .../CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll | 24 ++++----- llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 9 ++-- .../CodeGen/RISCV/rvv/vector-interleave-fixed.ll | 57 ++++++++-------------- 5 files changed, 48 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5fab317..fa0db23 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -891,6 +891,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, OtherVT, VT, Expand); } + // Custom lower fixed vector undefs to scalable vector undefs to avoid + // expansion to a build_vector of 0s. + setOperationAction(ISD::UNDEF, VT, Custom); + // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, Custom); @@ -1021,6 +1025,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTruncStoreAction(VT, OtherVT, Expand); } + // Custom lower fixed vector undefs to scalable vector undefs to avoid + // expansion to a build_vector of 0s. + setOperationAction(ISD::UNDEF, VT, Custom); + // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, Custom); @@ -4956,6 +4964,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); return lowerVPREDUCE(Op, DAG); + case ISD::UNDEF: { + MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); + return convertFromScalableVector(Op.getSimpleValueType(), + DAG.getUNDEF(ContainerVT), DAG, Subtarget); + } case ISD::INSERT_SUBVECTOR: return lowerINSERT_SUBVECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index dd3f399..40412ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -532,11 +532,8 @@ define <8 x i8> @widen_splat_ve3(<4 x i8> %v) { ; CHECK-LABEL: widen_splat_ve3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vrgather.vi v8, v9, 3 +; CHECK-NEXT: vrgather.vi v9, v8, 3 +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> ret <8 x i8> %shuf diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 863544e..fcc7cf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -415,6 +415,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV32-NEXT: vmv.x.s a2, v8 ; RV32-NEXT: andi a3, a2, 1 +; RV32-NEXT: # implicit-def: $v8 ; RV32-NEXT: beqz a3, .LBB8_2 ; RV32-NEXT: # %bb.1: # %cond.load ; RV32-NEXT: lbu a3, 1(a0) @@ -429,15 +430,10 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 -; RV32-NEXT: andi a2, a2, 2 -; RV32-NEXT: bnez a2, .LBB8_3 -; RV32-NEXT: j .LBB8_4 -; RV32-NEXT: .LBB8_2: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: .LBB8_2: # %else ; RV32-NEXT: andi a2, a2, 2 ; RV32-NEXT: beqz a2, .LBB8_4 -; RV32-NEXT: .LBB8_3: # %cond.load1 +; RV32-NEXT: # %bb.3: # %cond.load1 ; RV32-NEXT: lbu a2, 5(a0) ; RV32-NEXT: lbu a3, 4(a0) ; RV32-NEXT: lbu a4, 6(a0) @@ -448,9 +444,11 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: or a0, a0, a4 ; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: .LBB8_4: # %else2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: ret ; @@ -461,6 +459,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV64-NEXT: vmv.x.s a2, v8 ; RV64-NEXT: andi a3, a2, 1 +; RV64-NEXT: # implicit-def: $v8 ; RV64-NEXT: beqz a3, .LBB8_2 ; RV64-NEXT: # %bb.1: # %cond.load ; RV64-NEXT: lbu a3, 1(a0) @@ -475,15 +474,10 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-NEXT: or a3, a4, a3 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.v.x v8, a3 -; RV64-NEXT: andi a2, a2, 2 -; RV64-NEXT: bnez a2, .LBB8_3 -; RV64-NEXT: j .LBB8_4 -; RV64-NEXT: .LBB8_2: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: .LBB8_2: # %else ; RV64-NEXT: andi a2, a2, 2 ; RV64-NEXT: beqz a2, .LBB8_4 -; RV64-NEXT: .LBB8_3: # %cond.load1 +; RV64-NEXT: # %bb.3: # %cond.load1 ; RV64-NEXT: lbu a2, 5(a0) ; RV64-NEXT: lbu a3, 4(a0) ; RV64-NEXT: lbu a4, 6(a0) @@ -494,9 +488,11 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-NEXT: slli a0, a0, 24 ; RV64-NEXT: or a0, a0, a4 ; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: .LBB8_4: # %else2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: ret %mask = icmp eq <2 x i32> %m, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 72c3d4b..9f5c1c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -636,12 +636,9 @@ define <32 x half> @v16f16_2(<16 x half> %a) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v12, (a0) -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vsetivli zero, 16, e16, m4, tu, ma -; CHECK-NEXT: vmv.v.v v16, v8 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vrgather.vv v8, v16, v12 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v32f16 = shufflevector <16 x half> %a, <16 x half> undef, <32 x i32> ret <32 x half> %v32f16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index c34f31c..18ced70 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -7,17 +7,12 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { ; CHECK-LABEL: vector_interleave_v32i1_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmclr.m v9 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; CHECK-NEXT: vmv.v.v v9, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -60,32 +55,26 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { ; RV32-LABEL: vector_interleave_v4i64_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV32-NEXT: vmv.v.v v10, v8 +; RV32-NEXT: vmv1r.v v10, v9 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI3_0) ; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: vslideup.vi v10, v8, 2 -; RV32-NEXT: vrgatherei16.vv v8, v10, v12 +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4i64_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV64-NEXT: vmv.v.v v10, v8 +; RV64-NEXT: vmv1r.v v10, v9 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI3_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI3_0) ; RV64-NEXT: vle64.v v12, (a0) -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: vslideup.vi v10, v8, 2 -; RV64-NEXT: vrgather.vv v8, v10, v12 +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b) ret <4 x i64> %res @@ -166,32 +155,26 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { ; RV32-LABEL: vector_interleave_v4f64_v2f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV32-NEXT: vmv.v.v v10, v8 +; RV32-NEXT: vmv1r.v v10, v9 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: lui a0, %hi(.LCPI9_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI9_0) ; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: vmv1r.v v8, v9 -; RV32-NEXT: vslideup.vi v10, v8, 2 -; RV32-NEXT: vrgatherei16.vv v8, v10, v12 +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vrgatherei16.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4f64_v2f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV64-NEXT: vmv.v.v v10, v8 +; RV64-NEXT: vmv1r.v v10, v9 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI9_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI9_0) ; RV64-NEXT: vle64.v v12, (a0) -; RV64-NEXT: vmv1r.v v8, v9 -; RV64-NEXT: vslideup.vi v10, v8, 2 -; RV64-NEXT: vrgather.vv v8, v10, v12 +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b) ret <4 x double> %res -- 2.7.4