[RISCV] Add -riscv-v-fixed-length-vector-elen-max to limit the ELEN used for fixed...

author Craig Topper <craig.topper@sifive.com>

Fri, 27 Aug 2021 16:51:05 +0000 (09:51 -0700)

committer Craig Topper <craig.topper@sifive.com>

Fri, 27 Aug 2021 17:17:35 +0000 (10:17 -0700)
author Craig Topper <craig.topper@sifive.com>
Fri, 27 Aug 2021 16:51:05 +0000 (09:51 -0700)
committer Craig Topper <craig.topper@sifive.com>
Fri, 27 Aug 2021 17:17:35 +0000 (10:17 -0700)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp

index 0717432..790f795 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1204,8 +1204,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
  
    unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
  
+  MVT EltVT = VT.getVectorElementType();
+
    // Don't use RVV for vectors we cannot scalarize if required.
-  switch (VT.getVectorElementType().SimpleTy) {
+  switch (EltVT.SimpleTy) {
    // i1 is supported but has different rules.
    default:
      return false;
@@ -1234,6 +1236,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
      break;
    }
  
+  // Reject elements larger than ELEN.
+  if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
+    return false;
+
    unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
    // Don't use RVV for types that don't fit.
    if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
@@ -1260,6 +1266,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
           "Expected legal fixed length vector!");
  
    unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+  unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
  
    MVT EltVT = VT.getVectorElementType();
    switch (EltVT.SimpleTy) {
@@ -1274,10 +1281,12 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
    case MVT::f32:
    case MVT::f64: {
      // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
-    // narrower types, but we can't have a fractional LMUL with demoninator less
-    // than 64/SEW.
+    // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
+    // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
      unsigned NumElts =
-        divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
+        (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
+    NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
+    assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
      return MVT::getScalableVectorVT(EltVT, NumElts);
    }
    }
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp

index b19fdcb..56437b7 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -45,6 +45,11 @@ static cl::opt<unsigned> RVVVectorLMULMax(
               "Fractional LMUL values are not supported."),
      cl::init(8), cl::Hidden);
  
+static cl::opt<unsigned> RVVVectorELENMax(
+    "riscv-v-fixed-length-vector-elen-max",
+    cl::desc("The maximum ELEN value to use for fixed length vectors."),
+    cl::init(64), cl::Hidden);
+
  void RISCVSubtarget::anchor() {}
  
  RISCVSubtarget &
@@ -142,7 +147,18 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
           "Tried to get maximum LMUL without V extension support!");
    assert(RVVVectorLMULMax <= 8 && isPowerOf2_32(RVVVectorLMULMax) &&
           "V extension requires a LMUL to be at most 8 and a power of 2!");
-  return PowerOf2Floor(std::max<unsigned>(RVVVectorLMULMax, 1));
+  return PowerOf2Floor(
+      std::max<unsigned>(std::min<unsigned>(RVVVectorLMULMax, 8), 1));
+}
+
+unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
+  assert(hasStdExtV() &&
+         "Tried to get maximum ELEN without V extension support!");
+  assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
+         isPowerOf2_32(RVVVectorELENMax) &&
+         "V extension requires a ELEN to be a power of 2 between 8 and 64!");
+  return PowerOf2Floor(
+      std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, 64), 8));
  }
  
  bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h

index ce36331..cf33ebf 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -158,6 +158,7 @@ public:
    unsigned getMaxRVVVectorSizeInBits() const;
    unsigned getMinRVVVectorSizeInBits() const;
    unsigned getMaxLMULForFixedLengthVectors() const;
+  unsigned getMaxELENForFixedLengthVectors() const;
    bool useRVVForFixedLengthVectors() const;
  };
  } // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

index 7be85cf..1c475bd 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -104,6 +104,12 @@ public:
      if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
        return false;
  
+    // Don't allow elements larger than the ELEN.
+    // FIXME: How to limit for scalable vectors?
+    if (isa<FixedVectorType>(DataType) &&
+        DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+      return false;
+
      if (Alignment <
          DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
        return false;
@@ -126,6 +132,12 @@ public:
      if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
        return false;
  
+    // Don't allow elements larger than the ELEN.
+    // FIXME: How to limit for scalable vectors?
+    if (isa<FixedVectorType>(DataType) &&
+        DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+      return false;
+
      if (Alignment <
          DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
        return false;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll

new file mode 100644 (file)

index 0000000..8e78e89
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+; Test that limiting ELEN, scalarizes elements larger than that and disables
+; some fractional LMULs.
+
+; This should use LMUL=1.
+define void @add_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: add_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v25, (a0)
+; CHECK-NEXT:    vle32.v v26, (a1)
+; CHECK-NEXT:    vadd.vv v25, v25, v26
+; CHECK-NEXT:    vse32.v v25, (a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i32>, <4 x i32>* %x
+  %b = load <4 x i32>, <4 x i32>* %y
+  %c = add <4 x i32> %a, %b
+  store <4 x i32> %c, <4 x i32>* %x
+  ret void
+}
+
+; i64 vectors should be scalarized
+define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
+; RV32-LABEL: add_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 8(a0)
+; RV32-NEXT:    lw a6, 12(a0)
+; RV32-NEXT:    lw a4, 0(a0)
+; RV32-NEXT:    lw a7, 4(a0)
+; RV32-NEXT:    lw a3, 4(a1)
+; RV32-NEXT:    lw a5, 0(a1)
+; RV32-NEXT:    lw t0, 8(a1)
+; RV32-NEXT:    lw a1, 12(a1)
+; RV32-NEXT:    add a3, a7, a3
+; RV32-NEXT:    add a5, a4, a5
+; RV32-NEXT:    sltu a4, a5, a4
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a1, a6, a1
+; RV32-NEXT:    add a4, a2, t0
+; RV32-NEXT:    sltu a2, a4, a2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    sw a4, 8(a0)
+; RV32-NEXT:    sw a5, 0(a0)
+; RV32-NEXT:    sw a1, 12(a0)
+; RV32-NEXT:    sw a3, 4(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: add_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a2, 8(a0)
+; RV64-NEXT:    ld a3, 0(a0)
+; RV64-NEXT:    ld a4, 0(a1)
+; RV64-NEXT:    ld a1, 8(a1)
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a1, a2, a1
+; RV64-NEXT:    sd a1, 8(a0)
+; RV64-NEXT:    sd a3, 0(a0)
+; RV64-NEXT:    ret
+  %a = load <2 x i64>, <2 x i64>* %x
+  %b = load <2 x i64>, <2 x i64>* %y
+  %c = add <2 x i64> %a, %b
+  store <2 x i64> %c, <2 x i64>* %x
+  ret void
+}
+
+; This should use LMUL=1 becuase there are no fractional i32 LMULs with ELEN=32
+define void @add_v2i32(<2 x i32>* %x, <2 x i32>* %y) {
+; CHECK-LABEL: add_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v25, (a0)
+; CHECK-NEXT:    vle32.v v26, (a1)
+; CHECK-NEXT:    vadd.vv v25, v25, v26
+; CHECK-NEXT:    vse32.v v25, (a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, <2 x i32>* %x
+  %b = load <2 x i32>, <2 x i32>* %y
+  %c = add <2 x i32> %a, %b
+  store <2 x i32> %c, <2 x i32>* %x
+  ret void
+}
+
+; i64 vectors should be scalarized
+define void @add_v1i64(<1 x i64>* %x, <1 x i64>* %y) {
+; RV32-LABEL: add_v1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 0(a0)
+; RV32-NEXT:    lw a3, 4(a0)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    sltu a2, a1, a2
+; RV32-NEXT:    add a2, a3, a2
+; RV32-NEXT:    sw a1, 0(a0)
+; RV32-NEXT:    sw a2, 4(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: add_v1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a2, 0(a0)
+; RV64-NEXT:    ld a1, 0(a1)
+; RV64-NEXT:    add a1, a2, a1
+; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    ret
+  %a = load <1 x i64>, <1 x i64>* %x
+  %b = load <1 x i64>, <1 x i64>* %y
+  %c = add <1 x i64> %a, %b
+  store <1 x i64> %c, <1 x i64>* %x
+  ret void
+}
+
+; This should use LMUL=1.
+define void @fadd_v4f32(<4 x float>* %x, <4 x float>* %y) {
+; CHECK-LABEL: fadd_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v25, (a0)
+; CHECK-NEXT:    vle32.v v26, (a1)
+; CHECK-NEXT:    vfadd.vv v25, v25, v26
+; CHECK-NEXT:    vse32.v v25, (a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x float>, <4 x float>* %x
+  %b = load <4 x float>, <4 x float>* %y
+  %c = fadd <4 x float> %a, %b
+  store <4 x float> %c, <4 x float>* %x
+  ret void
+}
+
+; double vectors should be scalarized
+define void @fadd_v2f64(<2 x double>* %x, <2 x double>* %y) {
+; CHECK-LABEL: fadd_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld ft0, 8(a0)
+; CHECK-NEXT:    fld ft1, 0(a0)
+; CHECK-NEXT:    fld ft2, 0(a1)
+; CHECK-NEXT:    fld ft3, 8(a1)
+; CHECK-NEXT:    fadd.d ft1, ft1, ft2
+; CHECK-NEXT:    fadd.d ft0, ft0, ft3
+; CHECK-NEXT:    fsd ft0, 8(a0)
+; CHECK-NEXT:    fsd ft1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x double>, <2 x double>* %x
+  %b = load <2 x double>, <2 x double>* %y
+  %c = fadd <2 x double> %a, %b
+  store <2 x double> %c, <2 x double>* %x
+  ret void
+}
+
+; This should use LMUL=1 becuase there are no fractional float LMULs with ELEN=32
+define void @fadd_v2f32(<2 x float>* %x, <2 x float>* %y) {
+; CHECK-LABEL: fadd_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v25, (a0)
+; CHECK-NEXT:    vle32.v v26, (a1)
+; CHECK-NEXT:    vfadd.vv v25, v25, v26
+; CHECK-NEXT:    vse32.v v25, (a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x float>, <2 x float>* %x
+  %b = load <2 x float>, <2 x float>* %y
+  %c = fadd <2 x float> %a, %b
+  store <2 x float> %c, <2 x float>* %x
+  ret void
+}
+
+; double vectors should be scalarized
+define void @fadd_v1f64(<1 x double>* %x, <1 x double>* %y) {
+; CHECK-LABEL: fadd_v1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld ft0, 0(a0)
+; CHECK-NEXT:    fld ft1, 0(a1)
+; CHECK-NEXT:    fadd.d ft0, ft0, ft1
+; CHECK-NEXT:    fsd ft0, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <1 x double>, <1 x double>* %x
+  %b = load <1 x double>, <1 x double>* %y
+  %c = fadd <1 x double> %a, %b
+  store <1 x double> %c, <1 x double>* %x
+  ret void
+}
author	Craig Topper <craig.topper@sifive.com>
	Fri, 27 Aug 2021 16:51:05 +0000 (09:51 -0700)
committer	Craig Topper <craig.topper@sifive.com>
	Fri, 27 Aug 2021 17:17:35 +0000 (10:17 -0700)
llvm/lib/Target/RISCV/RISCVISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVSubtarget.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVSubtarget.h		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll	[new file with mode: 0644]	patch \| blob