break;
}
case Intrinsic::experimental_vector_insert: {
- VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
- VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
+ Value *Vec = Call.getArgOperand(0);
+ Value *SubVec = Call.getArgOperand(1);
+ Value *Idx = Call.getArgOperand(2);
+ unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(Vec->getType());
+ VectorType *SubVecTy = cast<VectorType>(SubVec->getType());
+
+ ElementCount VecEC = VecTy->getElementCount();
+ ElementCount SubVecEC = SubVecTy->getElementCount();
Assert(VecTy->getElementType() == SubVecTy->getElementType(),
"experimental_vector_insert parameters must have the same element "
"type.",
&Call);
+ Assert(IdxN % SubVecEC.getKnownMinValue() == 0,
+ "experimental_vector_insert index must be a constant multiple of "
+ "the subvector's known minimum vector length.");
+
+ // If this insertion is not the 'mixed' case where a fixed vector is
+ // inserted into a scalable vector, ensure that the insertion of the
+ // subvector does not overrun the parent vector.
+ if (VecEC.isScalable() == SubVecEC.isScalable()) {
+ Assert(
+ IdxN < VecEC.getKnownMinValue() &&
+ IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+ "subvector operand of experimental_vector_insert would overrun the "
+ "vector being inserted into.");
+ }
break;
}
case Intrinsic::experimental_vector_extract: {
+ Value *Vec = Call.getArgOperand(0);
+ Value *Idx = Call.getArgOperand(1);
+ unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+
VectorType *ResultTy = cast<VectorType>(Call.getType());
- VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+ VectorType *VecTy = cast<VectorType>(Vec->getType());
+
+ ElementCount VecEC = VecTy->getElementCount();
+ ElementCount ResultEC = ResultTy->getElementCount();
Assert(ResultTy->getElementType() == VecTy->getElementType(),
"experimental_vector_extract result must have the same element "
"type as the input vector.",
&Call);
+ Assert(IdxN % ResultEC.getKnownMinValue() == 0,
+ "experimental_vector_extract index must be a constant multiple of "
+ "the result type's known minimum vector length.");
+
+ // If this extraction is not the 'mixed' case where a fixed vector is is
+ // extracted from a scalable vector, ensure that the extraction does not
+ // overrun the parent vector.
+ if (VecEC.isScalable() == ResultEC.isScalable()) {
+ Assert(IdxN < VecEC.getKnownMinValue() &&
+ IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+ "experimental_vector_extract would overrun.");
+ }
break;
}
case Intrinsic::experimental_noalias_scope_decl: {
unsigned SubVecNumElts = SubVecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
- // The result of this call is undefined if IdxN is not a constant multiple
- // of the SubVec's minimum vector length OR the insertion overruns Vec.
- if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
- replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
- return eraseInstFromFunction(CI);
- }
-
// An insert that entirely overwrites Vec with SubVec is a nop.
if (VecNumElts == SubVecNumElts) {
replaceInstUsesWith(CI, SubVec);
unsigned VecNumElts = VecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
- // The result of this call is undefined if IdxN is not a constant multiple
- // of the result type's minimum vector length OR the extraction overruns
- // Vec.
- if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
- replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
- return eraseInstFromFunction(CI);
- }
-
// Extracting the entirety of Vec is a nop.
if (VecNumElts == DstNumElts) {
replaceInstUsesWith(CI, Vec);
}
; Goes through memory currently; idx != 0.
-define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
-; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
+define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #2
+; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: ldur q0, [sp, #8]
+; CHECK-NEXT: lsl x8, x8, #3
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
-%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
+%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
ret <2 x i64> %retval
}
}
; Goes through memory currently; idx != 0.
-define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
-; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
+define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cntw x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #4
+; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: ldur q0, [sp, #4]
+; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
+ %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
ret <4 x i32> %retval
}
}
; Goes through memory currently; idx != 0.
-define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
-; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
+define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cnth x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #8
+; CHECK-NEXT: cmp x9, #8 // =8
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK-NEXT: ldur q0, [sp, #2]
+; CHECK-NEXT: lsl x8, x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
+ %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
ret <8 x i16> %retval
}
}
; Goes through memory currently; idx != 0.
-define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
-; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
+define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x9, #1
+; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov w8, #16
+; CHECK-NEXT: cmp x9, #16 // =16
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
-; CHECK-NEXT: ldur q0, [sp, #1]
+; CHECK-NEXT: csel x8, x9, x8, lo
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
+ %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
ret <16 x i8> %retval
}
ret <vscale x 2 x i64> %retval
}
-define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
-; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
+define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
+; CHECK-LABEL: insert_v2i64_nxv2i64_idx2:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #2
+; CHECK-NEXT: cmp x9, #2 // =2
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: lsl x8, x8, #3
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
-; CHECK-NEXT: stur q1, [sp, #8]
+; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
+ %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2)
ret <vscale x 2 x i64> %retval
}
ret <vscale x 4 x i32> %retval
}
-define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
-; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
+define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
+; CHECK-LABEL: insert_v4i32_nxv4i32_idx4:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cntw x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #4
+; CHECK-NEXT: cmp x9, #4 // =4
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: stur q1, [sp, #4]
+; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
+ %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 4)
ret <vscale x 4 x i32> %retval
}
ret <vscale x 8 x i16> %retval
}
-define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
-; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
+define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
+; CHECK-LABEL: insert_v8i16_nxv8i16_idx8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: cnth x9
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #8
+; CHECK-NEXT: cmp x9, #8 // =8
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: lsl x8, x8, #1
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK-NEXT: stur q1, [sp, #2]
+; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
+ %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 8)
ret <vscale x 8 x i16> %retval
}
ret <vscale x 16 x i8> %retval
}
-define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
-; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
+define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
+; CHECK-LABEL: insert_v16i8_nxv16i8_idx16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x9, #1
+; CHECK-NEXT: sub x9, x9, #1 // =1
+; CHECK-NEXT: mov w8, #16
+; CHECK-NEXT: cmp x9, #16 // =16
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: csel x8, x9, x8, lo
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
-; CHECK-NEXT: stur q1, [sp, #1]
+; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
- %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
+ %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 16)
ret <vscale x 16 x i8> %retval
}
ret void
}
-define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
-; CHECK-LABEL: extract_v8i1_nxv2i1_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
-; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
-; CHECK-NEXT: vslidedown.vi v25, v25, 2
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmsne.vi v25, v25, 0
-; CHECK-NEXT: vse1.v v25, (a0)
-; CHECK-NEXT: ret
- %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
- store <8 x i1> %c, <8 x i1>* %y
- ret void
-}
-
define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-LABEL: extract_v8i1_nxv64i1_0:
; CHECK: # %bb.0:
ret <vscale x 8 x i32> %v
}
-define <vscale x 8 x i32> @insert_nxv8i32_v8i32_4(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
-; LMULMAX2-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX2: # %bb.0:
-; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX2-NEXT: vle32.v v28, (a0)
-; LMULMAX2-NEXT: vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX2-NEXT: vslideup.vi v8, v28, 4
-; LMULMAX2-NEXT: ret
-;
-; LMULMAX1-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT: vle32.v v28, (a0)
-; LMULMAX1-NEXT: addi a0, a0, 16
-; LMULMAX1-NEXT: vle32.v v12, (a0)
-; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v8, v28, 4
-; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX1-NEXT: vslideup.vi v8, v12, 8
-; LMULMAX1-NEXT: ret
- %sv = load <8 x i32>, <8 x i32>* %svp
- %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 4)
- ret <vscale x 8 x i32> %v
-}
-
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
; LMULMAX2: # %bb.0:
ret <vscale x 2 x i1> %c
}
-define <vscale x 2 x i1> @insert_nxv2i1_v4i1_6(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
-; CHECK-LABEL: insert_nxv2i1_v4i1_6:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vle1.v v27, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
-; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vmv1r.v v0, v27
-; CHECK-NEXT: vmerge.vim v26, v26, 1, v0
-; CHECK-NEXT: vsetivli zero, 10, e8, mf4, tu, mu
-; CHECK-NEXT: vslideup.vi v25, v26, 6
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmsne.vi v0, v25, 0
-; CHECK-NEXT: ret
- %sv = load <4 x i1>, <4 x i1>* %svp
- %c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 6)
- ret <vscale x 2 x i1> %c
-}
-
define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
; CHECK-LABEL: insert_nxv8i1_v4i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef,
- <2 x i64> <i64 1, i64 1>, i64 1)
+ <2 x i64> <i64 1, i64 1>, i64 2)
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2 , i64 0)
%4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
%5 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %3, <vscale x 2 x i64> %4)
}
; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the destination vector's length,
-; otherwise the result is undefined.
-define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT: ret <4 x i32> undef
-;
- %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
- ret <4 x i32> %1
-}
-
-; If the extraction overruns the vector, the result is undefined.
-define <10 x i32> @extract_overrun(<8 x i32> %vec) {
-; CHECK-LABEL: @extract_overrun(
-; CHECK-NEXT: ret <10 x i32> undef
-;
- %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
- ret <10 x i32> %1
-}
-
-; ============================================================================ ;
; Scalable cases
; ============================================================================ ;
}
; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the subvector's minimum vector
-; length, otherwise the result is undefined.
-define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT: ret <8 x i32> undef
-;
- %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
- ret <8 x i32> %1
-}
-
-; If the insertion overruns the vector, the result is undefined.
-define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
-; CHECK-LABEL: @insert_overrun(
-; CHECK-NEXT: ret <8 x i32> undef
-;
- %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
- ret <8 x i32> %1
-}
-
-; ============================================================================ ;
; Scalable cases
; ============================================================================ ;
--- /dev/null
+; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
+
+;
+; Test that extractions/insertion indices are validated.
+;
+
+; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length.
+define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) {
+ %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
+ ret <4 x i32> %1
+}
+
+; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length.
+define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
+ ret <8 x i32> %1
+}
+
+;
+; Test that extractions/insertions which 'overrun' are captured.
+;
+
+; CHECK: experimental_vector_extract would overrun.
+define <3 x i32> @extract_overrun_fixed_fixed(<8 x i32> %vec) {
+ %1 = call <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32> %vec, i64 6)
+ ret <3 x i32> %1
+}
+
+; CHECK: experimental_vector_extract would overrun.
+define <vscale x 3 x i32> @extract_overrun_scalable_scalable(<vscale x 8 x i32> %vec) {
+ %1 = call <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, i64 6)
+ ret <vscale x 3 x i32> %1
+}
+
+; We cannot statically check whether or not an extraction of a fixed vector
+; from a scalable vector would overrun, because we can't compare the sizes of
+; the two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_extract
+define <3 x i32> @extract_overrun_scalable_fixed(<vscale x 8 x i32> %vec) {
+ %1 = call <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32> %vec, i64 6)
+ ret <3 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <8 x i32> @insert_overrun_fixed_fixed(<8 x i32> %vec, <3 x i32> %subvec) {
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 6)
+ ret <8 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <vscale x 8 x i32> @insert_overrun_scalable_scalable(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec) {
+ %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %1
+}
+
+; We cannot statically check whether or not an insertion of a fixed vector into
+; a scalable vector would overrun, because we can't compare the sizes of the
+; two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_insert
+define <vscale x 8 x i32> @insert_overrun_scalable_fixed(<vscale x 8 x i32> %vec, <3 x i32> %subvec) {
+ %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32> %vec, <3 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %1
+}
+
+declare <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32>, <vscale x 3 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32>, <3 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32>, i64)
+declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32>, <3 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)