TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128")
-TARGET_BUILTIN(__builtin_wasm_load8_lane, "V16ScScC*V16ScIi", "n", "simd128")
-TARGET_BUILTIN(__builtin_wasm_load16_lane, "V8ssC*V8sIi", "n", "simd128")
-TARGET_BUILTIN(__builtin_wasm_load32_lane, "V4iiC*V4iIi", "n", "simd128")
-TARGET_BUILTIN(__builtin_wasm_load64_lane, "V2LLiLLiC*V2LLiIi", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_store8_lane, "vSc*V16ScIi", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_store16_lane, "vs*V8sIi", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_store32_lane, "vi*V4iIi", "n", "simd128")
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero);
return Builder.CreateCall(Callee, {Ptr});
}
- case WebAssembly::BI__builtin_wasm_load8_lane:
- case WebAssembly::BI__builtin_wasm_load16_lane:
- case WebAssembly::BI__builtin_wasm_load32_lane:
- case WebAssembly::BI__builtin_wasm_load64_lane:
case WebAssembly::BI__builtin_wasm_store8_lane:
case WebAssembly::BI__builtin_wasm_store16_lane:
case WebAssembly::BI__builtin_wasm_store32_lane:
Value *LaneIdx = llvm::ConstantInt::get(getLLVMContext(), *LaneIdxConst);
unsigned IntNo;
switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_load8_lane:
- IntNo = Intrinsic::wasm_load8_lane;
- break;
- case WebAssembly::BI__builtin_wasm_load16_lane:
- IntNo = Intrinsic::wasm_load16_lane;
- break;
- case WebAssembly::BI__builtin_wasm_load32_lane:
- IntNo = Intrinsic::wasm_load32_lane;
- break;
- case WebAssembly::BI__builtin_wasm_load64_lane:
- IntNo = Intrinsic::wasm_load64_lane;
- break;
case WebAssembly::BI__builtin_wasm_store8_lane:
IntNo = Intrinsic::wasm_store8_lane;
break;
return (v128_t)(__i64x2){__v, 0};
}
-#define wasm_v128_load8_lane(__ptr, __vec, __i) \
- ((v128_t)__builtin_wasm_load8_lane((const signed char *)(__ptr), \
- (__i8x16)(__vec), (__i)))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_lane(
+ const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) {
+ struct __wasm_v128_load8_lane_struct {
+ int8_t __v;
+ } __attribute__((__packed__, __may_alias__));
+ int8_t __v = ((const struct __wasm_v128_load8_lane_struct *)__mem)->__v;
+ __i8x16 __ret = (__i8x16)__vec;
+ __ret[__i] = __v;
+ return (v128_t)__ret;
+}
-#define wasm_v128_load16_lane(__ptr, __vec, __i) \
- ((v128_t)__builtin_wasm_load16_lane((const short *)(__ptr), \
- (__i16x8)(__vec), (__i)))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load16_lane(
+ const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) {
+ struct __wasm_v128_load16_lane_struct {
+ int16_t __v;
+ } __attribute__((__packed__, __may_alias__));
+ int16_t __v = ((const struct __wasm_v128_load16_lane_struct *)__mem)->__v;
+ __i16x8 __ret = (__i16x8)__vec;
+ __ret[__i] = __v;
+ return (v128_t)__ret;
+}
-#define wasm_v128_load32_lane(__ptr, __vec, __i) \
- ((v128_t)__builtin_wasm_load32_lane((const int *)(__ptr), (__i32x4)(__vec), \
- (__i)))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_lane(
+ const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) {
+ struct __wasm_v128_load32_lane_struct {
+ int32_t __v;
+ } __attribute__((__packed__, __may_alias__));
+ int32_t __v = ((const struct __wasm_v128_load32_lane_struct *)__mem)->__v;
+ __i32x4 __ret = (__i32x4)__vec;
+ __ret[__i] = __v;
+ return (v128_t)__ret;
+}
-#define wasm_v128_load64_lane(__ptr, __vec, __i) \
- ((v128_t)__builtin_wasm_load64_lane((const long long int *)(__ptr), \
- (__i64x2)(__vec), (__i)))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_lane(
+ const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) {
+ struct __wasm_v128_load64_lane_struct {
+ int64_t __v;
+ } __attribute__((__packed__, __may_alias__));
+ int64_t __v = ((const struct __wasm_v128_load64_lane_struct *)__mem)->__v;
+ __i64x2 __ret = (__i64x2)__vec;
+ __ret[__i] = __v;
+ return (v128_t)__ret;
+}
static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void *__mem,
v128_t __a) {
// WEBASSEMBLY-NEXT: ret
}
-i8x16 load8_lane(const signed char *p, i8x16 v) {
- return __builtin_wasm_load8_lane(p, v, 0);
- // WEBASSEMBLY: tail call <16 x i8> @llvm.wasm.load8.lane(
- // WEBASSEMBLY-SAME: i8* %p, <16 x i8> %v, i32 0)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i16x8 load16_lane(const short *p, i16x8 v) {
- return __builtin_wasm_load16_lane(p, v, 0);
- // WEBASSEMBLY: tail call <8 x i16> @llvm.wasm.load16.lane(
- // WEBASSEMBLY-SAME: i16* %p, <8 x i16> %v, i32 0)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i32x4 load32_lane(const int *p, i32x4 v) {
- return __builtin_wasm_load32_lane(p, v, 0);
- // WEBASSEMBLY: tail call <4 x i32> @llvm.wasm.load32.lane(
- // WEBASSEMBLY-SAME: i32* %p, <4 x i32> %v, i32 0)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i64x2 load64_lane(const long long *p, i64x2 v) {
- return __builtin_wasm_load64_lane(p, v, 0);
- // WEBASSEMBLY: tail call <2 x i64> @llvm.wasm.load64.lane(
- // WEBASSEMBLY-SAME: i64* %p, <2 x i64> %v, i32 0)
- // WEBASSEMBLY-NEXT: ret
-}
-
void store8_lane(signed char *p, i8x16 v) {
__builtin_wasm_store8_lane(p, v, 0);
// WEBASSEMBLY: call void @llvm.wasm.store8.lane(
// CHECK-LABEL: @test_v128_load8_lane(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.load8.lane(i8* [[PTR:%.*]], <16 x i8> [[TMP0]], i32 15)
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
+// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) {
// CHECK-LABEL: @test_v128_load16_lane(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.load16.lane(i16* [[PTR:%.*]], <8 x i16> [[TMP0]], i32 7)
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
+// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i32 7
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) {
// CHECK-LABEL: @test_v128_load32_lane(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.load32.lane(i32* [[PTR:%.*]], <4 x i32> [[VEC:%.*]], i32 3)
-// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
//
v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) {
return wasm_v128_load32_lane(ptr, vec, 3);
// CHECK-LABEL: @test_v128_load64_lane(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.wasm.load64.lane(i64* [[PTR:%.*]], <2 x i64> [[TMP0]], i32 1)
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
+// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i32 1
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) {
// CHECK-LABEL: @test_v128_any_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10:[0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CHECK-LABEL: @test_v128_bitselect(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
// CHECK-LABEL: @test_i8x16_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_i8x16_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CHECK-LABEL: @test_i8x16_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i8x16_bitmask(v128_t a) {
// CHECK-LABEL: @test_i8x16_popcnt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-LABEL: @test_i16x8_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_i16x8_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CHECK-LABEL: @test_i16x8_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i16x8_bitmask(v128_t a) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-LABEL: @test_i32x4_abs(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_i32x4_abs(v128_t a) {
// CHECK-LABEL: @test_i32x4_all_true(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CHECK-LABEL: @test_i32x4_bitmask(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR8]]
// CHECK-NEXT: ret i32 [[TMP0]]
//
int32_t test_i32x4_bitmask(v128_t a) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_i64x2_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
// CHECK-LABEL: @test_i64x2_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i64x2_bitmask(v128_t a) {
// CHECK-LABEL: @test_f32x4_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f32x4_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f32x4_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f32x4_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f32x4_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f32x4_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-LABEL: @test_f64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f64x2_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f64x2_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f64x2_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f64x2_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_f64x2_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-LABEL: @test_i16x8_narrow_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-LABEL: @test_u16x8_narrow_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR8]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
// ISD::TargetConstant, which would require extra complications in the ISel
// tablegen patterns. TODO: Replace these intrinsic with normal ISel patterns
// once the load_lane instructions are merged to the proposal.
-def int_wasm_load8_lane :
- Intrinsic<[llvm_v16i8_ty],
- [LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly],
- "", [SDNPMemOperand]>;
-def int_wasm_load16_lane :
- Intrinsic<[llvm_v8i16_ty],
- [LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly],
- "", [SDNPMemOperand]>;
-def int_wasm_load32_lane :
- Intrinsic<[llvm_v4i32_ty],
- [LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly],
- "", [SDNPMemOperand]>;
-def int_wasm_load64_lane :
- Intrinsic<[llvm_v2i64_ty],
- [LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly],
- "", [SDNPMemOperand]>;
def int_wasm_store8_lane :
Intrinsic<[],
[LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
Info.align = Align(1);
Info.flags = MachineMemOperand::MOLoad;
return true;
- case Intrinsic::wasm_load8_lane:
- case Intrinsic::wasm_load16_lane:
- case Intrinsic::wasm_load32_lane:
- case Intrinsic::wasm_load64_lane:
case Intrinsic::wasm_store8_lane:
case Intrinsic::wasm_store16_lane:
case Intrinsic::wasm_store32_lane:
case Intrinsic::wasm_store64_lane: {
MVT MemVT;
switch (Intrinsic) {
- case Intrinsic::wasm_load8_lane:
case Intrinsic::wasm_store8_lane:
MemVT = MVT::i8;
break;
- case Intrinsic::wasm_load16_lane:
case Intrinsic::wasm_store16_lane:
MemVT = MVT::i16;
break;
- case Intrinsic::wasm_load32_lane:
case Intrinsic::wasm_store32_lane:
MemVT = MVT::i32;
break;
- case Intrinsic::wasm_load64_lane:
case Intrinsic::wasm_store64_lane:
MemVT = MVT::i64;
break;
default:
llvm_unreachable("unexpected intrinsic");
}
- if (Intrinsic == Intrinsic::wasm_load8_lane ||
- Intrinsic == Intrinsic::wasm_load16_lane ||
- Intrinsic == Intrinsic::wasm_load32_lane ||
- Intrinsic == Intrinsic::wasm_load64_lane) {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.flags = MachineMemOperand::MOLoad;
- } else {
- Info.opc = ISD::INTRINSIC_VOID;
- Info.flags = MachineMemOperand::MOStore;
- }
- Info.ptrVal = I.getArgOperand(0);
+ Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = MemVT;
+ Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(1);
+ Info.flags = MachineMemOperand::MOStore;
return true;
}
default:
} // mayLoad = 1, UseNamedOperandTable = 1
}
-// TODO: Also support v4f32 and v2f64 once the instructions are merged
-// to the proposal
defm "" : SIMDLoadLane<I8x16, 0x54>;
defm "" : SIMDLoadLane<I16x8, 0x55>;
defm "" : SIMDLoadLane<I32x4, 0x56>;
Requires<[HasAddr64]>;
}
-defm : LoadLanePatNoOffset<I8x16, int_wasm_load8_lane>;
-defm : LoadLanePatNoOffset<I16x8, int_wasm_load16_lane>;
-defm : LoadLanePatNoOffset<I32x4, int_wasm_load32_lane>;
-defm : LoadLanePatNoOffset<I64x2, int_wasm_load64_lane>;
+def load8_lane :
+ PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+ (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
+def load16_lane :
+ PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+ (vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>;
+def load32_lane :
+ PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+ (vector_insert $vec, (i32 (load $ptr)), $idx)>;
+def load64_lane :
+ PatFrag<(ops node:$ptr, node:$vec, node:$idx),
+ (vector_insert $vec, (i64 (load $ptr)), $idx)>;
+// TODO: floating point lanes as well
+
+defm : LoadLanePatNoOffset<I8x16, load8_lane>;
+defm : LoadLanePatNoOffset<I16x8, load16_lane>;
+defm : LoadLanePatNoOffset<I32x4, load32_lane>;
+defm : LoadLanePatNoOffset<I64x2, load64_lane>;
// TODO: Also support the other load patterns for load_lane once the instructions
// are merged to the proposal.
; CHECK-LABEL: mashup_const_i8x16:
; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
-; CHECK: i8x16.replace_lane
+; CHECK: v128.load8_lane
; CHECK: i8x16.replace_lane
; CHECK: i8x16.replace_lane
; CHECK: return
; CHECK-LABEL: mashup_splat_i8x16:
; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2
-; CHECK: i8x16.replace_lane
+; CHECK: v128.load8_lane
; CHECK: i8x16.replace_lane
; CHECK: return
define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
target triple = "wasm32-unknown-unknown"
-declare <16 x i8> @llvm.wasm.load8.lane(i8*, <16 x i8>, i32)
-declare <8 x i16> @llvm.wasm.load16.lane(i16*, <8 x i16>, i32)
-declare <4 x i32> @llvm.wasm.load32.lane(i32*, <4 x i32>, i32)
-declare <2 x i64> @llvm.wasm.load64.lane(i64*, <2 x i64>, i32)
-
declare void @llvm.wasm.store8.lane(i8*, <16 x i8>, i32)
declare void @llvm.wasm.store16.lane(i16*, <8 x i16>, i32)
declare void @llvm.wasm.store32.lane(i32*, <4 x i32>, i32)
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %p, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %p
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
%q = ptrtoint i8* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i8*
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i8, i8* %p, i32 6
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i8, i8* %p, i32 -6
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
%q = ptrtoint i8* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i8*
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i8, i8* %p, i32 6
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 42 to i8*
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* %s, <16 x i8> %v, i32 0)
+ %x = load i8, i8* %s
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <16 x i8> @llvm.wasm.load8.lane(i8* @gv_i8, <16 x i8> %v, i32 0)
+ %x = load i8, i8* @gv_i8
+ %t = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %p, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %p
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i16* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i16*
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: i32.const 12
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i16, i16* %p, i32 6
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: i32.const -12
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i16, i16* %p, i32 -6
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i16* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i16*
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: i32.const 12
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i16, i16* %p, i32 6
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 42
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 42 to i16*
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* %s, <8 x i16> %v, i32 0)
+ %x = load i16, i16* %s
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const gv_i16
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <8 x i16> @llvm.wasm.load16.lane(i16* @gv_i16, <8 x i16> %v, i32 0)
+ %x = load i16, i16* @gv_i16
+ %t = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %p, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %p
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i32* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i32*
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i32, i32* %p, i32 6
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: i32.const -24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i32, i32* %p, i32 -6
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i32* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i32*
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i32, i32* %p, i32 6
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 42
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 42 to i32*
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* %s, <4 x i32> %v, i32 0)
+ %x = load i32, i32* %s
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const gv_i32
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <4 x i32> @llvm.wasm.load32.lane(i32* @gv_i32, <4 x i32> %v, i32 0)
+ %x = load i32, i32* @gv_i32
+ %t = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %p, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %p
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i64* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i64*
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: i32.const 48
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i64, i64* %p, i32 6
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: i32.const -48
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr inbounds i64, i64* %p, i32 -6
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%q = ptrtoint i64* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i64*
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: i32.const 48
; CHECK-NEXT: i32.add
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = getelementptr i64, i64* %p, i32 6
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 42
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%s = inttoptr i32 42 to i64*
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* %s, <2 x i64> %v, i32 0)
+ %x = load i64, i64* %s
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const gv_i64
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
- %t = tail call <2 x i64> @llvm.wasm.load64.lane(i64* @gv_i64, <2 x i64> %v, i32 0)
+ %x = load i64, i64* @gv_i64
+ %t = insertelement <2 x i64> %v, i64 %x, i32 0
ret <2 x i64> %t
}
ret <16 x i8> %v2
}
+; 1 is the default alignment for v128.load8_lane so no attribute is needed.
+define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a1:
+; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load8_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i8, i8* %p, align 1
+ %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+ ret <16 x i8> %v1
+}
+
+; 2 is greater than the default alignment so it is ignored.
+define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) {
+; CHECK-LABEL: load_lane_i8_a2:
+; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load8_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i8, i8* %p, align 2
+ %v1 = insertelement <16 x i8> %v, i8 %e, i32 0
+ ret <16 x i8> %v1
+}
+
; ==============================================================================
; 8 x i16
; ==============================================================================
ret <8 x i16> %v2
}
+define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a1:
+; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i16, i16* %p, align 1
+ %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+ ret <8 x i16> %v1
+}
+
+; 2 is the default alignment for v128.load16_lane so no attribute is needed.
+define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a2:
+; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load16_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i16, i16* %p, align 2
+ %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+ ret <8 x i16> %v1
+}
+
+; 4 is greater than the default alignment so it is ignored.
+define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) {
+; CHECK-LABEL: load_lane_i16_a4:
+; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load16_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i16, i16* %p, align 4
+ %v1 = insertelement <8 x i16> %v, i16 %e, i32 0
+ ret <8 x i16> %v1
+}
+
; ==============================================================================
; 4 x i32
; ==============================================================================
ret <4 x i32> %v2
}
+define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a1:
+; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i32, i32* %p, align 1
+ %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+ ret <4 x i32> %v1
+}
+
+define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a2:
+; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load32_lane 0:p2align=1, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i32, i32* %p, align 2
+ %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+ ret <4 x i32> %v1
+}
+
+; 4 is the default alignment for v128.load32_lane so no attribute is needed.
+define <4 x i32> @load_lane_i32_a4(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a4:
+; CHECK: .functype load_lane_i32_a4 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load32_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i32, i32* %p, align 4
+ %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+ ret <4 x i32> %v1
+}
+
+; 8 is greater than the default alignment so it is ignored.
+define <4 x i32> @load_lane_i32_a8(i32* %p, <4 x i32> %v) {
+; CHECK-LABEL: load_lane_i32_a8:
+; CHECK: .functype load_lane_i32_a8 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load32_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i32, i32* %p, align 8
+ %v1 = insertelement <4 x i32> %v, i32 %e, i32 0
+ ret <4 x i32> %v1
+}
+
; ==============================================================================
; 2 x i64
; ==============================================================================
ret <2 x i64> %v2
}
+define <2 x i64> @load_lane_i64_a1(i64* %p, <2 x i64> %v) {
+; CHECK-LABEL: load_lane_i64_a1:
+; CHECK: .functype load_lane_i64_a1 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i64, i64* %p, align 1
+ %v1 = insertelement <2 x i64> %v, i64 %e, i32 0
+ ret <2 x i64> %v1
+}
+
+define <2 x i64> @load_lane_i64_a2(i64* %p, <2 x i64> %v) {
+; CHECK-LABEL: load_lane_i64_a2:
+; CHECK: .functype load_lane_i64_a2 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load64_lane 0:p2align=1, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i64, i64* %p, align 2
+ %v1 = insertelement <2 x i64> %v, i64 %e, i32 0
+ ret <2 x i64> %v1
+}
+
+define <2 x i64> @load_lane_i64_a4(i64* %p, <2 x i64> %v) {
+; CHECK-LABEL: load_lane_i64_a4:
+; CHECK: .functype load_lane_i64_a4 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load64_lane 0:p2align=2, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i64, i64* %p, align 4
+ %v1 = insertelement <2 x i64> %v, i64 %e, i32 0
+ ret <2 x i64> %v1
+}
+
+; 8 is the default alignment for v128.load64_lane so no attribute is needed.
+define <2 x i64> @load_lane_i64_a8(i64* %p, <2 x i64> %v) {
+; CHECK-LABEL: load_lane_i64_a8:
+; CHECK: .functype load_lane_i64_a8 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load64_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i64, i64* %p, align 8
+ %v1 = insertelement <2 x i64> %v, i64 %e, i32 0
+ ret <2 x i64> %v1
+}
+
+; 16 is greater than the default alignment so it is ignored.
+define <2 x i64> @load_lane_i64_a16(i64* %p, <2 x i64> %v) {
+; CHECK-LABEL: load_lane_i64_a16:
+; CHECK: .functype load_lane_i64_a16 (i32, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: v128.load64_lane 0, 0
+; CHECK-NEXT: # fallthrough-return
+ %e = load i64, i64* %p, align 16
+ %v1 = insertelement <2 x i64> %v, i64 %e, i32 0
+ ret <2 x i64> %v1
+}
+
; ==============================================================================
; 4 x float
; ==============================================================================