From 5f2145adf9e343e950935cac1c9030f8ca597912 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 5 Apr 2023 13:35:08 +0100 Subject: [PATCH] [RISCV] Use non-strided load if VL=1 for optimized zero stride loads When optimizing vmv.s.x/vmv.v.x's of scalar loads, if VL is known to be 1 then we don't need to perform a stride of x0, and can just do a regular load. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D147609 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 21 +++++++++++++-------- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 12 ++++++------ .../RISCV/rvv/fixed-vectors-masked-gather.ll | 10 +++++----- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 9e351b6..8c303a4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2055,10 +2055,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case RISCVISD::VFMV_S_F_VL: case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: { - // Only if we have optimized zero-stride vector load. - if (!Subtarget->hasOptimizedZeroStrideLoad()) - break; - // Try to match splat of a scalar load to a strided load with stride of x0. bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || Node->getOpcode() == RISCVISD::VFMV_S_F_VL; @@ -2089,13 +2085,22 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); - SDValue Operands[] = {Ld->getBasePtr(), - CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, - Ld->getChain()}; + // If VL=1, then we don't need to do a strided load and can just do a + // regular load. + bool IsStrided = !isOneConstant(VL); + + // Only do a strided load if we have optimized zero-stride vector load. + if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) + break; + + SmallVector Operands = {Ld->getBasePtr()}; + if (IsStrided) + Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); + Operands.append({VL, SEW, Ld->getChain()}); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( - /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, + /*IsMasked*/ false, /*IsTU*/ false, IsStrided, /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 6b7f581..27e9918 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -671,7 +671,7 @@ define <64 x i1> @buildvec_mask_v64i1() { ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0) ; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI19_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1: @@ -691,7 +691,7 @@ define <64 x i1> @buildvec_mask_v64i1() { ; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0) ; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI19_0) ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX8-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX8-NEXT: vle64.v v0, (a0) ; RV64-LMULMAX8-NEXT: ret ret <64 x i1> } @@ -799,10 +799,10 @@ define <128 x i1> @buildvec_mask_v128i1() { ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0) ; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_1) ; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_1) -; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero +; RV64-LMULMAX4-NEXT: vle64.v v8, (a0) ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: @@ -938,10 +938,10 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) ; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1) ; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1) -; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero +; RV64-LMULMAX4-NEXT: vle64.v v8, (a0) ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 293e5e2..f99a5a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -39,7 +39,7 @@ define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero +; RV64ZVE32F-NEXT: vle8.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB0_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru) @@ -876,7 +876,7 @@ define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthr ; RV64ZVE32F-NEXT: bnez a1, .LBB13_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vle16.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB13_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru) @@ -2079,7 +2079,7 @@ define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthr ; RV64ZVE32F-NEXT: bnez a1, .LBB27_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vle32.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB27_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru) @@ -7152,7 +7152,7 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt ; RV64ZVE32F-NEXT: bnez a1, .LBB58_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vle16.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB58_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru) @@ -8119,7 +8119,7 @@ define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %pas ; RV64ZVE32F-NEXT: bnez a1, .LBB68_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vle32.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB68_2: # %else ; RV64ZVE32F-NEXT: ret %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru) -- 2.7.4