From 457da7f298fe0081d2b0dde8f3ca1a472ea35754 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Wed, 14 Apr 2021 09:03:27 +0100
Subject: [PATCH] [SelectionDAG] Relax constraints on STEP_VECTOR step operand

This patch relaxes the requirement that the STEP_VECTOR step constant
must be of a type at least as large as the vector element type. This
does not permit its use on targets which have legal vector element types
larger than the largest legal scalar type, such as i64 vectors on RV32.

As such, the requirement has been loosened so that the step operand must
be any scalar type so long as the constant immediate is non-negative and
the value fits inside the vector element type.

This limits combining optimizations in certain circumstances but in
practice it's unlikely to be a hindrance.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D100660
---
 llvm/include/llvm/CodeGen/ISDOpcodes.h             |  13 +-
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp     |   7 +-
 llvm/test/CodeGen/RISCV/rvv/stepvector-rv32.ll     | 233 ---------------------
 .../rvv/{stepvector-rv64.ll => stepvector.ll}      |  31 ++-
 4 files changed, 36 insertions(+), 248 deletions(-)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/stepvector-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{stepvector-rv64.ll => stepvector.ll} (91%)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index a633726..9a172a9 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -594,10 +594,15 @@ enum NodeType {
 
   /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised
   /// of a linear sequence of unsigned values starting from 0 with a step of
-  /// IMM, where IMM must be a constant positive integer value. The operation
-  /// does not support returning fixed-width vectors or non-constant operands.
-  /// If the sequence value exceeds the limit allowed for the element type then
-  /// the values for those lanes are undefined.
+  /// IMM, where IMM must be a vector index constant positive integer value
+  /// which must fit in the vector element type.
+  /// Note that IMM may be a smaller type than the vector element type, in
+  /// which case the step is implicitly zero-extended to the vector element
+  /// type. IMM may also be a larger type than the vector element type, in
+  /// which case the step is implicitly truncated to the vector element type.
+  /// The operation does not support returning fixed-width vectors or
+  /// non-constant operands. If the sequence value exceeds the limit allowed
+  /// for the element type then the values for those lanes are undefined.
   STEP_VECTOR,
 
   /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d244555..ba04823 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4706,11 +4706,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(VT.getScalarSizeInBits() >= 8 &&
            "STEP_VECTOR can only be used with vectors of integers that are at "
            "least 8 bits wide");
-    assert(Operand.getValueType().bitsGE(VT.getScalarType()) &&
-           "Operand type should be at least as large as the element type");
     assert(isa<ConstantSDNode>(Operand) &&
            cast<ConstantSDNode>(Operand)->getAPIntValue().isNonNegative() &&
-           "Expected positive integer constant for STEP_VECTOR");
+           cast<ConstantSDNode>(Operand)->getAPIntValue().isSignedIntN(
+               VT.getScalarSizeInBits()) &&
+           "Expected STEP_VECTOR integer constant to be positive and fit in "
+           "the vector element type");
     break;
   case ISD::FREEZE:
     assert(VT == Operand.getValueType() && "Unexpected VT!");
diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector-rv32.ll
deleted file mode 100644
index b11776d..0000000
--- a/llvm/test/CodeGen/RISCV/rvv/stepvector-rv32.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
-
-declare <vscale x 1 x i8> @llvm.experimental.stepvector.nxv1i8()
-
-define <vscale x 1 x i8> @stepvector_nxv1i8() {
-; CHECK-LABEL: stepvector_nxv1i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 1 x i8> @llvm.experimental.stepvector.nxv1i8()
-  ret <vscale x 1 x i8> %v
-}
-
-declare <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
-
-define <vscale x 2 x i8> @stepvector_nxv2i8() {
-; CHECK-LABEL: stepvector_nxv2i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
-  ret <vscale x 2 x i8> %v
-}
-
-declare <vscale x 4 x i8> @llvm.experimental.stepvector.nxv4i8()
-
-define <vscale x 4 x i8> @stepvector_nxv4i8() {
-; CHECK-LABEL: stepvector_nxv4i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 4 x i8> @llvm.experimental.stepvector.nxv4i8()
-  ret <vscale x 4 x i8> %v
-}
-
-declare <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
-
-define <vscale x 8 x i8> @stepvector_nxv8i8() {
-; CHECK-LABEL: stepvector_nxv8i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
-  ret <vscale x 8 x i8> %v
-}
-
-declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
-
-define <vscale x 16 x i8> @stepvector_nxv16i8() {
-; CHECK-LABEL: stepvector_nxv16i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
-  ret <vscale x 16 x i8> %v
-}
-
-declare <vscale x 32 x i8> @llvm.experimental.stepvector.nxv32i8()
-
-define <vscale x 32 x i8> @stepvector_nxv32i8() {
-; CHECK-LABEL: stepvector_nxv32i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 32 x i8> @llvm.experimental.stepvector.nxv32i8()
-  ret <vscale x 32 x i8> %v
-}
-
-declare <vscale x 64 x i8> @llvm.experimental.stepvector.nxv64i8()
-
-define <vscale x 64 x i8> @stepvector_nxv64i8() {
-; CHECK-LABEL: stepvector_nxv64i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 64 x i8> @llvm.experimental.stepvector.nxv64i8()
-  ret <vscale x 64 x i8> %v
-}
-
-declare <vscale x 1 x i16> @llvm.experimental.stepvector.nxv1i16()
-
-define <vscale x 1 x i16> @stepvector_nxv1i16() {
-; CHECK-LABEL: stepvector_nxv1i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 1 x i16> @llvm.experimental.stepvector.nxv1i16()
-  ret <vscale x 1 x i16> %v
-}
-
-declare <vscale x 2 x i16> @llvm.experimental.stepvector.nxv2i16()
-
-define <vscale x 2 x i16> @stepvector_nxv2i16() {
-; CHECK-LABEL: stepvector_nxv2i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 2 x i16> @llvm.experimental.stepvector.nxv2i16()
-  ret <vscale x 2 x i16> %v
-}
-
-declare <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
-
-define <vscale x 4 x i16> @stepvector_nxv4i16() {
-; CHECK-LABEL: stepvector_nxv4i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
-  ret <vscale x 4 x i16> %v
-}
-
-declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
-
-define <vscale x 8 x i16> @stepvector_nxv8i16() {
-; CHECK-LABEL: stepvector_nxv8i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
-  ret <vscale x 8 x i16> %v
-}
-
-declare <vscale x 16 x i16> @llvm.experimental.stepvector.nxv16i16()
-
-define <vscale x 16 x i16> @stepvector_nxv16i16() {
-; CHECK-LABEL: stepvector_nxv16i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 16 x i16> @llvm.experimental.stepvector.nxv16i16()
-  ret <vscale x 16 x i16> %v
-}
-
-declare <vscale x 32 x i16> @llvm.experimental.stepvector.nxv32i16()
-
-define <vscale x 32 x i16> @stepvector_nxv32i16() {
-; CHECK-LABEL: stepvector_nxv32i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 32 x i16> @llvm.experimental.stepvector.nxv32i16()
-  ret <vscale x 32 x i16> %v
-}
-
-declare <vscale x 1 x i32> @llvm.experimental.stepvector.nxv1i32()
-
-define <vscale x 1 x i32> @stepvector_nxv1i32() {
-; CHECK-LABEL: stepvector_nxv1i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 1 x i32> @llvm.experimental.stepvector.nxv1i32()
-  ret <vscale x 1 x i32> %v
-}
-
-declare <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
-
-define <vscale x 2 x i32> @stepvector_nxv2i32() {
-; CHECK-LABEL: stepvector_nxv2i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
-  ret <vscale x 2 x i32> %v
-}
-
-declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
-
-define <vscale x 4 x i32> @stepvector_nxv4i32() {
-; CHECK-LABEL: stepvector_nxv4i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
-  ret <vscale x 4 x i32> %v
-}
-
-declare <vscale x 8 x i32> @llvm.experimental.stepvector.nxv8i32()
-
-define <vscale x 8 x i32> @stepvector_nxv8i32() {
-; CHECK-LABEL: stepvector_nxv8i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 8 x i32> @llvm.experimental.stepvector.nxv8i32()
-  ret <vscale x 8 x i32> %v
-}
-
-declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
-
-define <vscale x 16 x i32> @stepvector_nxv16i32() {
-; CHECK-LABEL: stepvector_nxv16i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-  %v = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
-  ret <vscale x 16 x i32> %v
-}
-
-declare <vscale x 32 x i32> @llvm.experimental.stepvector.nxv32i32()
-
-define <vscale x 32 x i32> @stepvector_nxv32i32() {
-; CHECK-LABEL: stepvector_nxv32i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vadd.vx v16, v8, a0
-; CHECK-NEXT:    ret
-  %v = call <vscale x 32 x i32> @llvm.experimental.stepvector.nxv32i32()
-  ret <vscale x 32 x i32> %v
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
similarity index 91%
rename from llvm/test/CodeGen/RISCV/rvv/stepvector-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/stepvector.ll
index dec25b5..8dea3db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stepvector-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
@@ -1,5 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.experimental.stepvector.nxv1i8()
 
@@ -268,13 +271,25 @@ define <vscale x 8 x i64> @stepvector_nxv8i64() {
 declare <vscale x 16 x i64> @llvm.experimental.stepvector.nxv16i64()
 
 define <vscale x 16 x i64> @stepvector_nxv16i64() {
-; CHECK-LABEL: stepvector_nxv16i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vadd.vx v16, v8, a0
-; CHECK-NEXT:    ret
+; RV32-LABEL: stepvector_nxv16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
+; RV32-NEXT:    vmv.v.x v8, a0
+; RV32-NEXT:    addi a0, zero, 32
+; RV32-NEXT:    vsll.vx v8, v8, a0
+; RV32-NEXT:    vsrl.vx v16, v8, a0
+; RV32-NEXT:    vid.v v8
+; RV32-NEXT:    vadd.vv v16, v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: stepvector_nxv16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
+; RV64-NEXT:    vid.v v8
+; RV64-NEXT:    vadd.vx v16, v8, a0
+; RV64-NEXT:    ret
   %v = call <vscale x 16 x i64> @llvm.experimental.stepvector.nxv16i64()
   ret <vscale x 16 x i64> %v
 }
-- 
2.7.4