[RISCV] Prevent store combining from infinitely looping

author Fraser Cormack <fraser@codeplay.com>

Fri, 21 May 2021 12:00:19 +0000 (13:00 +0100)

committer Fraser Cormack <fraser@codeplay.com>

Mon, 24 May 2021 09:19:32 +0000 (10:19 +0100)
author Fraser Cormack <fraser@codeplay.com>
Fri, 21 May 2021 12:00:19 +0000 (13:00 +0100)
committer Fraser Cormack <fraser@codeplay.com>
Mon, 24 May 2021 09:19:32 +0000 (10:19 +0100)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp

index 0c219f7..e786d5e 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1144,6 +1144,13 @@ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
    return {SubRegIdx, InsertExtractIdx};
  }
  
+// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
+// stores for those types.
+bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
+  return !Subtarget.useRVVForFixedLengthVectors() ||
+         (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
+}
+
  static bool useRVVForFixedLengthVectorVT(MVT VT,
                                           const RISCVSubtarget &Subtarget) {
    assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
@@ -1372,8 +1379,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
        // If we have to use more than one INSERT_VECTOR_ELT then this
        // optimization is likely to increase code size; avoid peforming it in
-      // such a case. We can go through the stack as long as we're at least
-      // byte-sized.
+      // such a case. We can use a load from a constant pool in this case.
        if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
          return SDValue();
        // Now we can create our integer vector type. Note that it may be larger
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h

index 503a38d..24220f6 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -557,6 +557,14 @@ private:
        MachineFunction &MF) const;
  
    bool useRVVForFixedLengthVectorVT(MVT VT) const;
+
+  /// RVV code generation for fixed length vectors does not lower all
+  /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
+  /// merge. However, merging them creates a BUILD_VECTOR that is just as
+  /// illegal as the original, thus leading to an infinite legalisation loop.
+  /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
+  /// this override can be removed.
+  bool mergeStoresAfterLegalization(EVT VT) const override;
  };
  
  namespace RISCV {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

index cf761b8..daff1de 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1,9 +1,9 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
  
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
  
  ; Tests that a floating-point build_vector doesn't try and generate a VID
  ; instruction
@@ -20,12 +20,81 @@ define void @buildvec_no_vid_v4f32(<4 x float>* %x) {
    ret void
  }
  
+; Not all BUILD_VECTORs are successfully lowered by the backend: some are
+; expanded into scalarized stack stores. However, this may result in an
+; infinite loop in the DAGCombiner which tries to recombine those stores into a
+; BUILD_VECTOR followed by a vector store. The BUILD_VECTOR is then expanded
+; and the loop begins.
+; Until all BUILD_VECTORs are lowered, we disable store-combining after
+; legalization for fixed-length vectors.
+; This test uses a trick with a shufflevector which can't be lowered to a
+; SHUFFLE_VECTOR node; the mask is shorter than the source vectors and the
+; shuffle indices aren't located within the same 4-element subvector, so is
+; expanded to 4 EXTRACT_VECTOR_ELTs and a BUILD_VECTOR. This then triggers the
+; loop when expanded.
+define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize {
+; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    addi sp, sp, -16
+; LMULMAX1-NEXT:    .cfi_def_cfa_offset 16
+; LMULMAX1-NEXT:    vsetvli zero, zero, e32,m2,ta,mu
+; LMULMAX1-NEXT:    vfmv.f.s ft0, v10
+; LMULMAX1-NEXT:    fsw ft0, 8(sp)
+; LMULMAX1-NEXT:    vfmv.f.s ft0, v8
+; LMULMAX1-NEXT:    fsw ft0, 0(sp)
+; LMULMAX1-NEXT:    vsetivli a0, 1, e32,m2,ta,mu
+; LMULMAX1-NEXT:    vslidedown.vi v26, v10, 7
+; LMULMAX1-NEXT:    vfmv.f.s ft0, v26
+; LMULMAX1-NEXT:    fsw ft0, 12(sp)
+; LMULMAX1-NEXT:    vslidedown.vi v26, v8, 7
+; LMULMAX1-NEXT:    vfmv.f.s ft0, v26
+; LMULMAX1-NEXT:    fsw ft0, 4(sp)
+; LMULMAX1-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT:    vle32.v v8, (sp)
+; LMULMAX1-NEXT:    addi sp, sp, 16
+; LMULMAX1-NEXT:    ret
+;
+; LMULMAX2-LABEL: hang_when_merging_stores_after_legalization:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX2-NEXT:    vmv.v.i v25, 0
+; LMULMAX2-NEXT:    vrgather.vv v26, v8, v25
+; LMULMAX2-NEXT:    addi a0, zero, 2
+; LMULMAX2-NEXT:    vsetivli a1, 1, e8,mf8,ta,mu
+; LMULMAX2-NEXT:    vmv.s.x v0, a0
+; LMULMAX2-NEXT:    lui a0, %hi(.LCPI1_0)
+; LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI1_0)
+; LMULMAX2-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX2-NEXT:    vle32.v v27, (a0)
+; LMULMAX2-NEXT:    vsetivli a0, 4, e32,m1,tu,mu
+; LMULMAX2-NEXT:    vrgather.vv v26, v9, v27, v0.t
+; LMULMAX2-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX2-NEXT:    vrgather.vv v27, v10, v25
+; LMULMAX2-NEXT:    addi a0, zero, 8
+; LMULMAX2-NEXT:    vsetivli a1, 1, e8,mf8,ta,mu
+; LMULMAX2-NEXT:    vmv.s.x v0, a0
+; LMULMAX2-NEXT:    lui a0, %hi(.LCPI1_1)
+; LMULMAX2-NEXT:    addi a0, a0, %lo(.LCPI1_1)
+; LMULMAX2-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX2-NEXT:    vle32.v v25, (a0)
+; LMULMAX2-NEXT:    vsetivli a0, 4, e32,m1,tu,mu
+; LMULMAX2-NEXT:    vrgather.vv v27, v11, v25, v0.t
+; LMULMAX2-NEXT:    addi a0, zero, 3
+; LMULMAX2-NEXT:    vsetivli a1, 1, e8,mf8,ta,mu
+; LMULMAX2-NEXT:    vmv.s.x v0, a0
+; LMULMAX2-NEXT:    vsetivli a0, 4, e32,m1,ta,mu
+; LMULMAX2-NEXT:    vmerge.vvm v8, v27, v26, v0
+; LMULMAX2-NEXT:    ret
+  %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
+  ret <4 x float> %z
+}
+
  define void @buildvec_dominant0_v4f32(<4 x float>* %x) {
  ; CHECK-LABEL: buildvec_dominant0_v4f32:
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI1_0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI2_0)
  ; CHECK-NEXT:    vlse32.v v25, (a1), zero
  ; CHECK-NEXT:    fmv.w.x ft0, zero
  ; CHECK-NEXT:    vfmv.s.f v26, ft0
@@ -61,8 +130,8 @@ define void @buildvec_dominant1_v4f32(<4 x float>* %x, float %f) {
  define void @buildvec_dominant2_v4f32(<4 x float>* %x, float %f) {
  ; CHECK-LABEL: buildvec_dominant2_v4f32:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    flw ft0, %lo(.LCPI3_0)(a1)
+; CHECK-NEXT:    lui a1, %hi(.LCPI4_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI4_0)(a1)
  ; CHECK-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
  ; CHECK-NEXT:    vfmv.s.f v25, ft0
  ; CHECK-NEXT:    vfmv.v.f v26, fa0
@@ -84,8 +153,8 @@ define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) {
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    addi a1, zero, 6
  ; CHECK-NEXT:    vsetivli a2, 1, e8,mf8,ta,mu
-; CHECK-NEXT:    lui a2, %hi(.LCPI4_0)
-; CHECK-NEXT:    flw ft0, %lo(.LCPI4_0)(a2)
+; CHECK-NEXT:    lui a2, %hi(.LCPI5_0)
+; CHECK-NEXT:    flw ft0, %lo(.LCPI5_0)(a2)
  ; CHECK-NEXT:    vmv.s.x v0, a1
  ; CHECK-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
  ; CHECK-NEXT:    vfmv.v.f v25, fa0
author	Fraser Cormack <fraser@codeplay.com>
	Fri, 21 May 2021 12:00:19 +0000 (13:00 +0100)
committer	Fraser Cormack <fraser@codeplay.com>
	Mon, 24 May 2021 09:19:32 +0000 (10:19 +0100)
llvm/lib/Target/RISCV/RISCVISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll		patch \| blob \| history