case RISCVISD::VFMV_S_F_VL:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
- // Only if we have optimized zero-stride vector load.
- if (!Subtarget->hasOptimizedZeroStrideLoad())
- break;
-
// Try to match splat of a scalar load to a strided load with stride of x0.
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
- SDValue Operands[] = {Ld->getBasePtr(),
- CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
- Ld->getChain()};
+ // If VL=1, then we don't need to do a strided load and can just do a
+ // regular load.
+ bool IsStrided = !isOneConstant(VL);
+
+ // Only do a strided load if we have optimized zero-stride vector load.
+ if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
+ break;
+
+ SmallVector<SDValue> Operands = {Ld->getBasePtr()};
+ if (IsStrided)
+ Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
+ Operands.append({VL, SEW, Ld->getChain()});
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
- /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
+ /*IsMasked*/ false, /*IsTU*/ false, IsStrided, /*FF*/ false,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0)
; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI19_0)
; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero
+; RV64-LMULMAX4-NEXT: vle64.v v0, (a0)
; RV64-LMULMAX4-NEXT: ret
;
; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1:
; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0)
; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI19_0)
; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX8-NEXT: vlse64.v v0, (a0), zero
+; RV64-LMULMAX8-NEXT: vle64.v v0, (a0)
; RV64-LMULMAX8-NEXT: ret
ret <64 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
}
; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0)
; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_0)
; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero
+; RV64-LMULMAX4-NEXT: vle64.v v0, (a0)
; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_1)
; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_1)
-; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero
+; RV64-LMULMAX4-NEXT: vle64.v v8, (a0)
; RV64-LMULMAX4-NEXT: ret
;
; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1:
; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0)
; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0)
; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero
+; RV64-LMULMAX4-NEXT: vle64.v v0, (a0)
; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1)
; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1)
-; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero
+; RV64-LMULMAX4-NEXT: vle64.v v8, (a0)
; RV64-LMULMAX4-NEXT: ret
;
; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1:
; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero
+; RV64ZVE32F-NEXT: vle8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB0_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
+; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB13_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
+; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB27_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
+; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
+; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB68_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)