[IR] Change vector.splice intrinsic to reject out-of-bounds indices

author David Sherwood <david.sherwood@arm.com>

Fri, 17 Dec 2021 09:39:21 +0000 (09:39 +0000)

committer David Sherwood <david.sherwood@arm.com>

Tue, 11 Jan 2022 09:37:39 +0000 (09:37 +0000)
author David Sherwood <david.sherwood@arm.com>
Fri, 17 Dec 2021 09:39:21 +0000 (09:39 +0000)
committer David Sherwood <david.sherwood@arm.com>
Tue, 11 Jan 2022 09:37:39 +0000 (09:37 +0000)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst

index d8fd7da..342b79b 100644 (file)
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -17220,10 +17220,11 @@ For example:
  Arguments:
  """"""""""
  
-The first two operands are vectors with the same type. The third argument
-``imm`` is the start index, modulo VL, where VL is the runtime vector length of
-the source/result vector. The ``imm`` is a signed integer constant in the range
-``-VL <= imm < VL``. For values outside of this range the result is poison.
+The first two operands are vectors with the same type. The start index is imm
+modulo the runtime number of elements in the source vector. For a fixed-width
+vector <N x eltty>, imm is a signed integer constant in the range
+-N <= imm < N. For a scalable vector <vscale x N x eltty>, imm is a signed
+integer constant in the range -X <= imm < X where X=vscale_range_min * N.
  
  '``llvm.experimental.stepvector``' Intrinsic
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index ff482f3..9674a88 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -11245,12 +11245,6 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
  
    unsigned NumElts = VT.getVectorNumElements();
  
-  if ((-Imm > NumElts) || (Imm >= NumElts)) {
-    // Result is undefined if immediate is out-of-bounds.
-    setValue(&I, DAG.getUNDEF(VT));
-    return;
-  }
-
    uint64_t Idx = (NumElts + Imm) % NumElts;
  
    // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp

index 0e03105..fecbfc7 100644 (file)
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5352,6 +5352,24 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
  
      break;
    }
+  case Intrinsic::experimental_vector_splice: {
+    VectorType *VecTy = cast<VectorType>(Call.getType());
+    int64_t Idx = cast<ConstantInt>(Call.getArgOperand(2))->getSExtValue();
+    int64_t KnownMinNumElements = VecTy->getElementCount().getKnownMinValue();
+    if (Call.getParent() && Call.getParent()->getParent()) {
+      AttributeList Attrs = Call.getParent()->getParent()->getAttributes();
+      if (Attrs.hasFnAttr(Attribute::VScaleRange))
+        KnownMinNumElements *= Attrs.getFnAttrs().getVScaleRangeMin();
+    }
+    Assert((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) ||
+               (Idx >= 0 && Idx < KnownMinNumElements),
+           "The splice index exceeds the range [-VL, VL-1] where VL is the "
+           "known minimum number of elements in the vector. For scalable "
+           "vectors the minimum number of elements is determined from "
+           "vscale_range.",
+           &Call);
+    break;
+  }
    case Intrinsic::experimental_stepvector: {
      VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
      Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll

index 8229336..38bd644 100644 (file)
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll
@@ -62,15 +62,6 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 {
    ret <16 x float> %res
  }
  
-; Verify out-of-bounds index results in undef vector.
-define <2 x double> @splice_v2f64_idx_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: splice_v2f64_idx_out_of_bounds:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
-  ret <2 x double> %res
-}
-
  ;
  ; VECTOR_SPLICE (trailing elements)
  ;
@@ -130,15 +121,6 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 {
    ret <16 x float> %res
  }
  
-; Verify out-of-bounds trailing element count results in undef vector.
-define <2 x double> @splice_v2f64_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: splice_v2f64_out_of_bounds:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
-  ret <2 x double> %res
-}
-
  declare <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8>, <2 x i8>, i32)
  declare <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8>, <16 x i8>, i32)
  declare <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32>, <8 x i32>, i32)
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

index 91b2281..a03d8e0 100644 (file)
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -24,7 +24,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vsca
    ret <vscale x 16 x i8> %res
  }
  
-define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
  ; CHECK-LABEL: splice_nxv16i8_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #255
@@ -33,29 +33,6 @@ define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscal
    ret <vscale x 16 x i8> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: splice_nxv16i8_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    mov x8, #-1
-; CHECK-NEXT:    mov w9, #256
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    st1b { z1.b }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    addvl x8, x8, #1
-; CHECK-NEXT:    cmp x8, #256
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 256)
-  ret <vscale x 16 x i8> %res
-}
-
  define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
  ; CHECK-LABEL: splice_nxv8i16_first_idx:
  ; CHECK:       // %bb.0:
@@ -65,38 +42,6 @@ define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vsca
    ret <vscale x 8 x i16> %res
  }
  
-define <vscale x 8 x i16> @splice_nxv8i16_last_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_last_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 127)
-  ret <vscale x 8 x i16> %res
-}
-
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    mov w9, #128
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #128
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 128)
-  ret <vscale x 8 x i16> %res
-}
-
  define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
  ; CHECK-LABEL: splice_nxv4i32_first_idx:
  ; CHECK:       // %bb.0:
@@ -106,7 +51,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vsca
    ret <vscale x 4 x i32> %res
  }
  
-define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #1 {
  ; CHECK-LABEL: splice_nxv4i32_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -115,29 +60,6 @@ define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscal
    ret <vscale x 4 x i32> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: splice_nxv4i32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 64)
-  ret <vscale x 4 x i32> %res
-}
-
  define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
  ; CHECK-LABEL: splice_nxv2i64_first_idx:
  ; CHECK:       // %bb.0:
@@ -147,7 +69,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vsca
    ret <vscale x 2 x i64> %res
  }
  
-define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #1 {
  ; CHECK-LABEL: splice_nxv2i64_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -156,29 +78,6 @@ define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscal
    ret <vscale x 2 x i64> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: splice_nxv2i64_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 32)
-  ret <vscale x 2 x i64> %res
-}
-
  define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
  ; CHECK-LABEL: splice_nxv2f16_neg_idx:
  ; CHECK:       // %bb.0:
@@ -219,7 +118,7 @@ define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vs
    ret <vscale x 2 x half> %res
  }
  
-define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
+define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #1 {
  ; CHECK-LABEL: splice_nxv2f16_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -228,31 +127,6 @@ define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vsc
    ret <vscale x 2 x half> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x half> @splice_nxv2f16_clamped_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv2f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #3
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 32)
-  ret <vscale x 2 x half> %res
-}
-
  define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
  ; CHECK-LABEL: splice_nxv4f16_neg_idx:
  ; CHECK:       // %bb.0:
@@ -293,7 +167,7 @@ define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vs
    ret <vscale x 4 x half> %res
  }
  
-define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
+define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #1 {
  ; CHECK-LABEL: splice_nxv4f16_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -302,31 +176,6 @@ define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vsc
    ret <vscale x 4 x half> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x half> @splice_nxv4f16_clamped_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv4f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #2
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 64)
-  ret <vscale x 4 x half> %res
-}
-
  define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
  ; CHECK-LABEL: splice_nxv8f16_first_idx:
  ; CHECK:       // %bb.0:
@@ -336,7 +185,7 @@ define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vs
    ret <vscale x 8 x half> %res
  }
  
-define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #1 {
  ; CHECK-LABEL: splice_nxv8f16_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
@@ -345,29 +194,6 @@ define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vsc
    ret <vscale x 8 x half> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    mov w9, #128
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmp x8, #128
-; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 128)
-  ret <vscale x 8 x half> %res
-}
-
  define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
  ; CHECK-LABEL: splice_nxv2f32_neg_idx:
  ; CHECK:       // %bb.0:
@@ -408,7 +234,7 @@ define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <
    ret <vscale x 2 x float> %res
  }
  
-define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
+define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #1 {
  ; CHECK-LABEL: splice_nxv2f32_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -417,31 +243,6 @@ define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <v
    ret <vscale x 2 x float> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x float> @splice_nxv2f32_clamped_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv2f32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    lsl x8, x8, #3
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 32)
-  ret <vscale x 2 x float> %res
-}
-
  define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
  ; CHECK-LABEL: splice_nxv4f32_first_idx:
  ; CHECK:       // %bb.0:
@@ -451,7 +252,7 @@ define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <
    ret <vscale x 4 x float> %res
  }
  
-define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #1 {
  ; CHECK-LABEL: splice_nxv4f32_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -460,29 +261,6 @@ define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <v
    ret <vscale x 4 x float> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv4f32_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    mov w9, #64
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmp x8, #64
-; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1w { z1.s }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 64)
-  ret <vscale x 4 x float> %res
-}
-
  define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
  ; CHECK-LABEL: splice_nxv2f64_first_idx:
  ; CHECK:       // %bb.0:
@@ -492,7 +270,7 @@ define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a,
    ret <vscale x 2 x double> %res
  }
  
-define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
  ; CHECK-LABEL: splice_nxv2f64_last_idx:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -501,29 +279,6 @@ define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a,
    ret <vscale x 2 x double> %res
  }
  
-; Ensure index is clamped when we cannot prove it's less than 2048-bit.
-define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: splice_nxv2f64_clamped_idx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub x8, x8, #1
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmp x8, #32
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    csel x8, x8, x9, lo
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
-; CHECK-NEXT:    addvl sp, sp, #2
-; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 32)
-  ret <vscale x 2 x double> %res
-}
-
  ; Ensure predicate based splice is promoted to use ZPRs.
  define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
  ; CHECK-LABEL: splice_nxv2i1_idx:
@@ -617,8 +372,8 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
  }
  
  ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv16f32_clamped_idx:
+define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv16f32_16:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-8
@@ -684,9 +439,8 @@ define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16
    ret <vscale x 16 x i8> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 16 x i8> @splice_nxv16i8_clamped(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: splice_nxv16i8_clamped:
+define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
+; CHECK-LABEL: splice_nxv16i8_neg17:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -739,9 +493,8 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
    ret <vscale x 8 x i16> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 8 x i16> @splice_nxv8i16_clamped(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: splice_nxv8i16_clamped:
+define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {
+; CHECK-LABEL: splice_nxv8i16_neg9:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -794,9 +547,8 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
    ret <vscale x 4 x i32> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 4 x i32> @splice_nxv4i32_clamped(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: splice_nxv4i32_clamped:
+define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {
+; CHECK-LABEL: splice_nxv4i32_neg5:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -849,9 +601,8 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
    ret <vscale x 2 x i64> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 2 x i64> @splice_nxv2i64_clamped(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: splice_nxv2i64_clamped:
+define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {
+; CHECK-LABEL: splice_nxv2i64_neg3:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -904,9 +655,8 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
    ret <vscale x 8 x half> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 8 x half> @splice_nxv8f16_clamped(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_clamped:
+define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {
+; CHECK-LABEL: splice_nxv8f16_neg9:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -959,9 +709,8 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
    ret <vscale x 4 x float> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 4 x float> @splice_nxv4f32_clamped(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv4f32_clamped:
+define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv4f32_neg5:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -1014,9 +763,8 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
    ret <vscale x 2 x double> %res
  }
  
-; Ensure number of trailing elements is clamped when we cannot prove it's less than VL.
-define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: splice_nxv2f64_clamped:
+define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {
+; CHECK-LABEL: splice_nxv2f64_neg3:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-2
@@ -1147,8 +895,8 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
  }
  
  ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_clamped(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #0 {
-; CHECK-LABEL: splice_nxv16f32_clamped:
+define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+; CHECK-LABEL: splice_nxv16f32_neg17:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
  ; CHECK-NEXT:    addvl sp, sp, #-8
@@ -1198,3 +946,5 @@ declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale
  declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
  
  attributes #0 = { nounwind "target-features"="+sve" }
+attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
+attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }
diff --git a/llvm/test/Verifier/invalid-splice.ll b/llvm/test/Verifier/invalid-splice.ll

new file mode 100644 (file)

index 0000000..8c0c569
--- /dev/null
+++ b/llvm/test/Verifier/invalid-splice.ll
@@ -0,0 +1,37 @@
+; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx_neg3(<2 x double> %a, <2 x double> %b) #0 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
+  ret <2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <vscale x 2 x double> @splice_nxv2f64_idx_neg3_vscale_min1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
+  ret <vscale x 2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <vscale x 2 x double> @splice_nxv2f64_idx_neg5_vscale_min2(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+  %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -5)
+  ret <vscale x 2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx2(<2 x double> %a, <2 x double> %b) #0 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
+  ret <2 x double> %res
+}
+
+; CHECK: The splice index exceeds the range [-VL, VL-1] where VL is the known minimum number of elements in the vector
+define <2 x double> @splice_v2f64_idx3(<2 x double> %a, <2 x double> %b) #1 {
+  %res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 4)
+  ret <2 x double> %res
+}
+
+attributes #0 = { vscale_range(1,16) }
+attributes #1 = { vscale_range(2,16) }
+
+declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32)
+declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
author	David Sherwood <david.sherwood@arm.com>
	Fri, 17 Dec 2021 09:39:21 +0000 (09:39 +0000)
committer	David Sherwood <david.sherwood@arm.com>
	Tue, 11 Jan 2022 09:37:39 +0000 (09:37 +0000)
llvm/docs/LangRef.rst		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/IR/Verifier.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll		patch \| blob \| history
llvm/test/Verifier/invalid-splice.ll	[new file with mode: 0644]	patch \| blob