[RISCV] Don't use zero-stride vector load if there's no optimized u-arch

author wangpc <pc.wang@linux.alibaba.com>

Mon, 14 Nov 2022 05:50:51 +0000 (13:50 +0800)

committer wangpc <pc.wang@linux.alibaba.com>

Mon, 14 Nov 2022 05:51:30 +0000 (13:51 +0800)
author wangpc <pc.wang@linux.alibaba.com>
Mon, 14 Nov 2022 05:50:51 +0000 (13:50 +0800)
committer wangpc <pc.wang@linux.alibaba.com>
Mon, 14 Nov 2022 05:51:30 +0000 (13:51 +0800)
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td

index 3813769a73dc78b41d104381552b50189cf2df3d..d54384c25c0d521ce1433f862ec5d0de3b56fa7e 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -452,6 +452,11 @@ def FeatureUnalignedScalarMem
                        "true", "Has reasonably performant unaligned scalar "
                        "loads and stores">;
  
+def TuneNoOptimizedZeroStrideLoad
+   : SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
+                      "false", "Hasn't optimized (perform fewer memory operations)"
+                      "zero-stride vector load">;
+
  def TuneLUIADDIFusion
      : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
                         "true", "Enable LUI+ADDI macrofusion">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

index 6e3a2a38b81e5e54721dbbba291ebe42de014d3d..16a0ca4c49560bded704acc8b311a5645736322d 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1790,6 +1790,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
    case RISCVISD::VFMV_S_F_VL:
    case RISCVISD::VMV_V_X_VL:
    case RISCVISD::VFMV_V_F_VL: {
+    // Only if we have optimized zero-stride vector load.
+    if (!Subtarget->hasOptimizedZeroStrideLoad())
+      break;
+
      // Try to match splat of a scalar load to a strided load with stride of x0.
      bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
                          Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h

index 456dc00999aba1a31ad0ba91cf819c0516bfa25f..f79f9b4bdd4e7d22b750fcbee827e8fbd6d398b8 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -101,6 +101,7 @@ private:
    bool HasShortForwardBranchOpt = false;
    bool HasLUIADDIFusion = false;
    bool HasForcedAtomics = false;
+  bool HasOptimizedZeroStrideLoad = true;
    unsigned XLen = 32;
    unsigned ZvlLen = 0;
    MVT XLenVT = MVT::i32;
@@ -199,6 +200,7 @@ public:
    bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
    bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
    bool hasForcedAtomics() const { return HasForcedAtomics; }
+  bool hasOptimizedZeroStrideLoad() const { return HasOptimizedZeroStrideLoad; }
    MVT getXLenVT() const { return XLenVT; }
    unsigned getXLen() const { return XLen; }
    unsigned getFLen() const {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll

index 81fc7329be16263e27543ae446f4e73eb2c9b90a..879d06cfee1f3705a5fadd6138e1d259eaa3113e 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
@@ -1,8 +1,12 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
  ; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefixes=CHECK,OPTIMIZED
  ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefixes=CHECK,OPTIMIZED
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-zvfh,+v,+no-optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED
  
  define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
  ; CHECK-LABEL: vsplat_nxv8f16:
@@ -72,11 +76,18 @@ define <vscale x 8 x double> @vsplat_zero_nxv8f64() {
  
  ; Test that we fold this to a vlse with 0 stride.
  define <vscale x 8 x float> @vsplat_load_nxv8f32(float* %ptr) {
-; CHECK-LABEL: vsplat_load_nxv8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vlse32.v v8, (a0), zero
-; CHECK-NEXT:    ret
+; OPTIMIZED-LABEL: vsplat_load_nxv8f32:
+; OPTIMIZED:       # %bb.0:
+; OPTIMIZED-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; OPTIMIZED-NEXT:    vlse32.v v8, (a0), zero
+; OPTIMIZED-NEXT:    ret
+;
+; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32:
+; NOT-OPTIMIZED:       # %bb.0:
+; NOT-OPTIMIZED-NEXT:    flw ft0, 0(a0)
+; NOT-OPTIMIZED-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; NOT-OPTIMIZED-NEXT:    vfmv.v.f v8, ft0
+; NOT-OPTIMIZED-NEXT:    ret
    %f = load float, float* %ptr
    %head = insertelement <vscale x 8 x float> poison, float %f, i32 0
    %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
author	wangpc <pc.wang@linux.alibaba.com>
	Mon, 14 Nov 2022 05:50:51 +0000 (13:50 +0800)
committer	wangpc <pc.wang@linux.alibaba.com>
	Mon, 14 Nov 2022 05:51:30 +0000 (13:51 +0800)
llvm/lib/Target/RISCV/RISCV.td		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVSubtarget.h		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll		patch \| blob \| history