[ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>;
def int_vp_scatter: DefaultAttrsIntrinsic<[],
- [ llvm_anyvector_ty,
- LLVMVectorOfAnyPointersToElt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty],
- [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers
+ [ llvm_anyvector_ty,
+ LLVMVectorOfAnyPointersToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers
// Experimental strided memory accesses
def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[],
llvm_i32_ty],
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
-// Speculatable Binary operators
-let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+// Operators
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ // Integer arithmetic
def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
-}
-
-// Non-speculatable binary operators.
-let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-}
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
-// Floating-point arithmetic.
-let IntrProperties =
- [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ // Floating-point arithmetic
def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-
+ [ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-}
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
-// Casts.
-def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-
-// Shuffles.
-def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- llvm_i32_ty]>;
-
-def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- llvm_i32_ty]>;
-
-// Comparisons.
-let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
- def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
- [ llvm_anyvector_ty,
- LLVMMatchType<0>,
- llvm_metadata_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ // Casts
+ def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ // Shuffles
+ def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+ def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+
+ // Comparisons
+ def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
- [ llvm_anyvector_ty,
- LLVMMatchType<0>,
- llvm_metadata_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-}
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
-// Reductions
-let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ // Reductions
def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
}
def int_get_active_lane_mask:
// CHECK-DAG: declare i1 @llvm.coro.end(i8*, i1)
// CHECK-DAG: declare i8* @llvm.coro.free(token, i8* nocapture readonly)
// CHECK-DAG: declare void @llvm.coro.resume(i8*)
-// CHECK-DAG: declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
+// CHECK-DAG: declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
// CHECK-DAG: declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
// CHECK-DAG: declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
// CHECK-DAG: declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
// CHECK-DAG: declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
-// CHECK-DAG: declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #0
-// CHECK-DAG: declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) #0
-// CHECK-DAG: declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12
-// CHECK-DAG: declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #12
+// CHECK-DAG: declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.or.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.xor.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.smax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.smin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #2
+// CHECK-DAG: declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) #2
// CHECK-DAG: declare void @llvm.experimental.vp.strided.store.v8i32.p0i32.i32(<8 x i32>, i32* nocapture, i32, <8 x i1>, i32) #4
// CHECK-DAG: declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0i32.i32(i32* nocapture, i32, <8 x i1>, i32) #3
-// CHECK-DAG: declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) #12
-// CHECK-DAG: declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) #12
+// CHECK-DAG: declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i64> @llvm.vp.ptrtoint.v8i64.v8p0i32(<8 x i32*>, <8 x i1>, i32) #2
+// CHECK-DAG: declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i64(<8 x i64>, <8 x i1>, i32) #2