From: Thomas Lively <tlively@google.com>
Date: Tue, 4 May 2021 23:11:00 +0000 (-0700)
Subject: [WebAssembly] Add codegen test for wasm_simd128.h
X-Git-Tag: llvmorg-14-init~7608
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f3b769e82ff3340e96c2e50ac2bd6361fbc3d797;p=platform%2Fupstream%2Fllvm.git

[WebAssembly] Add codegen test for wasm_simd128.h

We previously did not have tests demonstrating that the intrinsics in
wasm_simd128.h lower to reasonable LLVM IR. This commit adds such a test.

Differential Revision: https://reviews.llvm.org/D101805
---

diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 0f3529a..ceee5ec 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1,61 +1,2861 @@
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -fsyntax-only -ffreestanding %s -verify
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -fsyntax-only -ffreestanding -flax-vector-conversions=none %s -verify
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -fsyntax-only -ffreestanding -x c++ %s -verify
-// expected-no-diagnostics
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --force-update
+// REQUIRES: webassembly-registered-target
 
-#if defined(__wasm__) && defined(__wasm_simd128__)
+// RUN: %clang %s -O2 -emit-llvm -S -o - -target wasm32-unknown-unknown -msimd128 | FileCheck %s
 
 #include <wasm_simd128.h>
 
-// Test that macros are correct as well
-int main() {
-  v128_t v;
-  v = wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-  v = wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
-  v = wasm_i32x4_const(0, 1, 2, 3);
-  v = wasm_i64x2_const(0, 1);
-  v = wasm_f32x4_const(0.0, 1.0, 2.0, 3.0);
-  v = wasm_f64x2_const(0.0, 1.0);
-
-  int8_t i8 = wasm_i8x16_extract_lane(v, 0);
-  uint8_t u8 = wasm_u8x16_extract_lane(v, 15);
-  v = wasm_i8x16_replace_lane(v, 0, 42);
-
-  int16_t i16 = wasm_i16x8_extract_lane(v, 0);
-  uint16_t u16 = wasm_u16x8_extract_lane(v, 7);
-  v = wasm_i16x8_replace_lane(v, 0, 42);
-
-  int32_t i32 = wasm_i32x4_extract_lane(v, 0);
-  v = wasm_i32x4_replace_lane(v, 0, 42);
-
-  int64_t i64 = wasm_i64x2_extract_lane(v, 0);
-  v = wasm_i64x2_replace_lane(v, 0, 42);
-
-  float f32 = wasm_f32x4_extract_lane(v, 0);
-  v = wasm_f32x4_replace_lane(v, 0, 42.0);
-
-  double f64 = wasm_f64x2_extract_lane(v, 0);
-  v = wasm_f64x2_replace_lane(v, 0, 42.0);
-
-  wasm_v8x16_shuffle(v, v, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-  wasm_v16x8_shuffle(v, v, 0, 1, 2, 3, 4, 5, 6, 7);
-  wasm_v32x4_shuffle(v, v, 0, 1, 2, 3);
-  wasm_v64x2_shuffle(v, v, 0, 1);
-
-  uint8_t lane8 = 0;
-  uint16_t lane16 = 0;
-  uint32_t lane32 = 0;
-  uint64_t lane64 = 0;
-  v = wasm_v128_load8_lane(&lane8, v, 0);
-  v = wasm_v128_load16_lane(&lane16, v, 0);
-  v = wasm_v128_load32_lane(&lane32, v, 0);
-  v = wasm_v128_load64_lane(&lane64, v, 0);
-  wasm_v128_store8_lane(&lane8, v, 15);
-  wasm_v128_store16_lane(&lane16, v, 7);
-  wasm_v128_store32_lane(&lane32, v, 3);
-  wasm_v128_store64_lane(&lane64, v, 1);
-
-  return 0;
-}
-
-#endif
+// CHECK-LABEL: @test_v128_load(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_v128_load(const void *mem) {
+  return wasm_v128_load(mem);
+}
+
+// CHECK-LABEL: @test_v128_load8_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_v128_load8_splat(const void *mem) {
+  return wasm_v128_load8_splat(mem);
+}
+
+// CHECK-LABEL: @test_v128_load16_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i16*
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_v128_load16_splat(const void *mem) {
+  return wasm_v128_load16_splat(mem);
+}
+
+// CHECK-LABEL: @test_v128_load32_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
+//
+v128_t test_v128_load32_splat(const void *mem) {
+  return wasm_v128_load32_splat(mem);
+}
+
+// CHECK-LABEL: @test_v128_load64_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> undef, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_v128_load64_splat(const void *mem) {
+  return wasm_v128_load64_splat(mem);
+}
+
+// CHECK-LABEL: @test_i16x8_load8x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i16x8_load8x8(const void *mem) {
+  return wasm_i16x8_load8x8(mem);
+}
+
+// CHECK-LABEL: @test_u16x8_load8x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u16x8_load8x8(const void *mem) {
+  return wasm_u16x8_load8x8(mem);
+}
+
+// CHECK-LABEL: @test_i32x4_load16x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_i32x4_load16x4(const void *mem) {
+  return wasm_i32x4_load16x4(mem);
+}
+
+// CHECK-LABEL: @test_u32x4_load16x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_u32x4_load16x4(const void *mem) {
+  return wasm_u32x4_load16x4(mem);
+}
+
+// CHECK-LABEL: @test_i64x2_load32x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_load32x2(const void *mem) {
+  return wasm_i64x2_load32x2(mem);
+}
+
+// CHECK-LABEL: @test_u64x2_load32x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u64x2_load32x2(const void *mem) {
+  return wasm_u64x2_load32x2(mem);
+}
+
+// CHECK-LABEL: @test_v128_load32_zero(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 0
+// CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
+//
+v128_t test_v128_load32_zero(const void *mem) {
+  return wasm_v128_load32_zero(mem);
+}
+
+// CHECK-LABEL: @test_v128_load64_zero(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_v128_load64_zero(const void *mem) {
+  return wasm_v128_load64_zero(mem);
+}
+
+// CHECK-LABEL: @test_v128_load8_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.load8.lane(i8* [[PTR:%.*]], <16 x i8> [[TMP0]], i32 15)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_v128_load8_lane(uint8_t *ptr, v128_t vec) {
+  return wasm_v128_load8_lane(ptr, vec, 15);
+}
+
+// CHECK-LABEL: @test_v128_load16_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.load16.lane(i16* [[PTR:%.*]], <8 x i16> [[TMP0]], i32 7)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_v128_load16_lane(uint16_t *ptr, v128_t vec) {
+  return wasm_v128_load16_lane(ptr, vec, 7);
+}
+
+// CHECK-LABEL: @test_v128_load32_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.load32.lane(i32* [[PTR:%.*]], <4 x i32> [[VEC:%.*]], i32 3)
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_v128_load32_lane(uint32_t *ptr, v128_t vec) {
+  return wasm_v128_load32_lane(ptr, vec, 3);
+}
+
+// CHECK-LABEL: @test_v128_load64_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.wasm.load64.lane(i64* [[PTR:%.*]], <2 x i64> [[TMP0]], i32 1)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_v128_load64_lane(uint64_t *ptr, v128_t vec) {
+  return wasm_v128_load64_lane(ptr, vec, 1);
+}
+
+// CHECK-LABEL: @test_v128_store(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
+// CHECK-NEXT:    store <4 x i32> [[A:%.*]], <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2]]
+// CHECK-NEXT:    ret void
+//
+void test_v128_store(void *mem, v128_t a) {
+  return wasm_v128_store(mem, a);
+}
+
+// CHECK-LABEL: @test_v128_store8_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
+// CHECK-NEXT:    tail call void @llvm.wasm.store8.lane(i8* [[PTR:%.*]], <16 x i8> [[TMP0]], i32 15)
+// CHECK-NEXT:    ret void
+//
+void test_v128_store8_lane(uint8_t *ptr, v128_t vec) {
+  return wasm_v128_store8_lane(ptr, vec, 15);
+}
+
+// CHECK-LABEL: @test_v128_store16_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
+// CHECK-NEXT:    tail call void @llvm.wasm.store16.lane(i16* [[PTR:%.*]], <8 x i16> [[TMP0]], i32 7)
+// CHECK-NEXT:    ret void
+//
+void test_v128_store16_lane(uint16_t *ptr, v128_t vec) {
+  return wasm_v128_store16_lane(ptr, vec, 7);
+}
+
+// CHECK-LABEL: @test_v128_store32_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.wasm.store32.lane(i32* [[PTR:%.*]], <4 x i32> [[VEC:%.*]], i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_v128_store32_lane(uint32_t *ptr, v128_t vec) {
+  return wasm_v128_store32_lane(ptr, vec, 3);
+}
+
+// CHECK-LABEL: @test_v128_store64_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
+// CHECK-NEXT:    tail call void @llvm.wasm.store64.lane(i64* [[PTR:%.*]], <2 x i64> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret void
+//
+void test_v128_store64_lane(uint64_t *ptr, v128_t vec) {
+  return wasm_v128_store64_lane(ptr, vec, 1);
+}
+
+// CHECK-LABEL: @test_i8x16_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i32 7
+// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i32 8
+// CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i32 9
+// CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i32 10
+// CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i32 11
+// CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i32 12
+// CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i32 13
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i32 14
+// CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i32 15
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
+  return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
+}
+
+// CHECK-LABEL: @test_i16x8_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
+  return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
+}
+
+// CHECK-LABEL: @test_i32x4_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i32 3
+// CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
+//
+v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
+  return wasm_i32x4_make(c0, c1, c2, c3);
+}
+
+// CHECK-LABEL: @test_i64x2_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i64x2_make(int64_t c0, int64_t c1) {
+  return wasm_i64x2_make(c0, c1);
+}
+
+// CHECK-LABEL: @test_f32x4_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f32x4_make(float c0, float c1, float c2, float c3) {
+  return wasm_f32x4_make(c0, c1, c2, c3);
+}
+
+// CHECK-LABEL: @test_f64x2_make(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[C0:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f64x2_make(double c0, double c1) {
+  return wasm_f64x2_make(c0, c1);
+}
+
+// CHECK-LABEL: @test_i8x16_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
+//
+v128_t test_i8x16_const() {
+  return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+// CHECK-LABEL: @test_i16x8_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
+//
+v128_t test_i16x8_const() {
+  return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+// CHECK-LABEL: @test_i32x4_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+//
+v128_t test_i32x4_const() {
+  return wasm_i32x4_const(0, 1, 2, 3);
+}
+
+// CHECK-LABEL: @test_i64x2_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+//
+v128_t test_i64x2_const() {
+  return wasm_i64x2_const(0, 1);
+}
+
+// CHECK-LABEL: @test_f32x4_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
+//
+v128_t test_f32x4_const() {
+  return wasm_f32x4_const(0, 1, 2, 3);
+}
+
+// CHECK-LABEL: @test_f64x2_const(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
+//
+v128_t test_f64x2_const() {
+  return wasm_f64x2_const(0, 1);
+}
+
+// CHECK-LABEL: @test_i8x16_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i8x16_splat(int8_t a) {
+  return wasm_i8x16_splat(a);
+}
+
+// CHECK-LABEL: @test_i8x16_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
+// CHECK-NEXT:    ret i8 [[TMP1]]
+//
+int8_t test_i8x16_extract_lane(v128_t a) {
+  return wasm_i8x16_extract_lane(a, 15);
+}
+
+// CHECK-LABEL: @test_u8x16_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <16 x i8> [[TMP0]], i32 15
+// CHECK-NEXT:    ret i8 [[TMP1]]
+//
+uint8_t test_u8x16_extract_lane(v128_t a) {
+  return wasm_u8x16_extract_lane(a, 15);
+}
+
+// CHECK-LABEL: @test_i8x16_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i32 15
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_replace_lane(v128_t a, int8_t b) {
+  return wasm_i8x16_replace_lane(a, 15, b);
+}
+
+// CHECK-LABEL: @test_i16x8_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i16x8_splat(int16_t a) {
+  return wasm_i16x8_splat(a);
+}
+
+// CHECK-LABEL: @test_i16x8_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
+// CHECK-NEXT:    ret i16 [[TMP1]]
+//
+int16_t test_i16x8_extract_lane(v128_t a) {
+  return wasm_i16x8_extract_lane(a, 7);
+}
+
+// CHECK-LABEL: @test_u16x8_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
+// CHECK-NEXT:    ret i16 [[TMP1]]
+//
+uint16_t test_u16x8_extract_lane(v128_t a) {
+  return wasm_u16x8_extract_lane(a, 7);
+}
+
+// CHECK-LABEL: @test_i16x8_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i32 7
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_replace_lane(v128_t a, int16_t b) {
+  return wasm_i16x8_replace_lane(a, 7, b);
+}
+
+// CHECK-LABEL: @test_i32x4_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
+//
+v128_t test_i32x4_splat(int32_t a) {
+  return wasm_i32x4_splat(a);
+}
+
+// CHECK-LABEL: @test_i32x4_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int32_t test_i32x4_extract_lane(v128_t a) {
+  return wasm_i32x4_extract_lane(a, 3);
+}
+
+// CHECK-LABEL: @test_i32x4_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 3
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i32x4_replace_lane(v128_t a, int32_t b) {
+  return wasm_i32x4_replace_lane(a, 3, b);
+}
+
+// CHECK-LABEL: @test_i64x2_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> undef, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i64x2_splat(int64_t a) {
+  return wasm_i64x2_splat(a);
+}
+
+// CHECK-LABEL: @test_i64x2_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+// CHECK-NEXT:    ret i64 [[TMP1]]
+//
+int64_t test_i64x2_extract_lane(v128_t a) {
+  return wasm_i64x2_extract_lane(a, 1);
+}
+
+// CHECK-LABEL: @test_i64x2_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_replace_lane(v128_t a, int64_t b) {
+  return wasm_i64x2_replace_lane(a, 1, b);
+}
+
+// CHECK-LABEL: @test_f32x4_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_f32x4_splat(float a) {
+  return wasm_f32x4_splat(a);
+}
+
+// CHECK-LABEL: @test_f32x4_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+// CHECK-NEXT:    ret float [[TMP1]]
+//
+float test_f32x4_extract_lane(v128_t a) {
+  return wasm_f32x4_extract_lane(a, 3);
+}
+
+// CHECK-LABEL: @test_f32x4_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 3
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_replace_lane(v128_t a, float b) {
+  return wasm_f32x4_replace_lane(a, 3, b);
+}
+
+// CHECK-LABEL: @test_f64x2_splat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> undef, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f64x2_splat(double a) {
+  return wasm_f64x2_splat(a);
+}
+
+// CHECK-LABEL: @test_f64x2_extract_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
+// CHECK-NEXT:    ret double [[TMP1]]
+//
+double test_f64x2_extract_lane(v128_t a) {
+  return wasm_f64x2_extract_lane(a, 1);
+}
+
+// CHECK-LABEL: @test_f64x2_replace_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_replace_lane(v128_t a, double b) {
+  return wasm_f64x2_replace_lane(a, 1, b);
+}
+
+// CHECK-LABEL: @test_i8x16_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_eq(v128_t a, v128_t b) {
+  return wasm_i8x16_eq(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_ne(v128_t a, v128_t b) {
+  return wasm_i8x16_ne(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_lt(v128_t a, v128_t b) {
+  return wasm_i8x16_lt(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u8x16_lt(v128_t a, v128_t b) {
+  return wasm_u8x16_lt(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_gt(v128_t a, v128_t b) {
+  return wasm_i8x16_gt(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u8x16_gt(v128_t a, v128_t b) {
+  return wasm_u8x16_gt(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_le(v128_t a, v128_t b) {
+  return wasm_i8x16_le(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u8x16_le(v128_t a, v128_t b) {
+  return wasm_u8x16_le(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_ge(v128_t a, v128_t b) {
+  return wasm_i8x16_ge(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u8x16_ge(v128_t a, v128_t b) {
+  return wasm_u8x16_ge(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_eq(v128_t a, v128_t b) {
+  return wasm_i16x8_eq(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_ne(v128_t a, v128_t b) {
+  return wasm_i16x8_ne(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_lt(v128_t a, v128_t b) {
+  return wasm_i16x8_lt(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u16x8_lt(v128_t a, v128_t b) {
+  return wasm_u16x8_lt(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_gt(v128_t a, v128_t b) {
+  return wasm_i16x8_gt(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u16x8_gt(v128_t a, v128_t b) {
+  return wasm_u16x8_gt(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_le(v128_t a, v128_t b) {
+  return wasm_i16x8_le(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u16x8_le(v128_t a, v128_t b) {
+  return wasm_u16x8_le(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_ge(v128_t a, v128_t b) {
+  return wasm_i16x8_ge(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u16x8_ge(v128_t a, v128_t b) {
+  return wasm_u16x8_ge(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_eq(v128_t a, v128_t b) {
+  return wasm_i32x4_eq(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_ne(v128_t a, v128_t b) {
+  return wasm_i32x4_ne(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_lt(v128_t a, v128_t b) {
+  return wasm_i32x4_lt(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_u32x4_lt(v128_t a, v128_t b) {
+  return wasm_u32x4_lt(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_gt(v128_t a, v128_t b) {
+  return wasm_i32x4_gt(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_u32x4_gt(v128_t a, v128_t b) {
+  return wasm_u32x4_gt(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_le(v128_t a, v128_t b) {
+  return wasm_i32x4_le(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_u32x4_le(v128_t a, v128_t b) {
+  return wasm_u32x4_le(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_i32x4_ge(v128_t a, v128_t b) {
+  return wasm_i32x4_ge(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_u32x4_ge(v128_t a, v128_t b) {
+  return wasm_u32x4_ge(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_eq(v128_t a, v128_t b) {
+  return wasm_i64x2_eq(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_ne(v128_t a, v128_t b) {
+  return wasm_i64x2_ne(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_lt(v128_t a, v128_t b) {
+  return wasm_i64x2_lt(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_gt(v128_t a, v128_t b) {
+  return wasm_i64x2_gt(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_le(v128_t a, v128_t b) {
+  return wasm_i64x2_le(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_ge(v128_t a, v128_t b) {
+  return wasm_i64x2_ge(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_eq(v128_t a, v128_t b) {
+  return wasm_f32x4_eq(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_ne(v128_t a, v128_t b) {
+  return wasm_f32x4_ne(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_lt(v128_t a, v128_t b) {
+  return wasm_f32x4_lt(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_gt(v128_t a, v128_t b) {
+  return wasm_f32x4_gt(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_le(v128_t a, v128_t b) {
+  return wasm_f32x4_le(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
+//
+v128_t test_f32x4_ge(v128_t a, v128_t b) {
+  return wasm_f32x4_ge(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_eq(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_eq(v128_t a, v128_t b) {
+  return wasm_f64x2_eq(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_ne(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_ne(v128_t a, v128_t b) {
+  return wasm_f64x2_ne(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_lt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_lt(v128_t a, v128_t b) {
+  return wasm_f64x2_lt(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_gt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_gt(v128_t a, v128_t b) {
+  return wasm_f64x2_gt(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_le(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_le(v128_t a, v128_t b) {
+  return wasm_f64x2_le(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_ge(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_ge(v128_t a, v128_t b) {
+  return wasm_f64x2_ge(a, b);
+}
+
+// CHECK-LABEL: @test_v128_not(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[NEG_I:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    ret <4 x i32> [[NEG_I]]
+//
+v128_t test_v128_not(v128_t a) {
+  return wasm_v128_not(a);
+}
+
+// CHECK-LABEL: @test_v128_and(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[AND_I]]
+//
+v128_t test_v128_and(v128_t a, v128_t b) {
+  return wasm_v128_and(a, b);
+}
+
+// CHECK-LABEL: @test_v128_or(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[OR_I]]
+//
+v128_t test_v128_or(v128_t a, v128_t b) {
+  return wasm_v128_or(a, b);
+}
+
+// CHECK-LABEL: @test_v128_xor(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[XOR_I]]
+//
+v128_t test_v128_xor(v128_t a, v128_t b) {
+  return wasm_v128_xor(a, b);
+}
+
+// CHECK-LABEL: @test_v128_andnot(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[NEG_I:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[NEG_I]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[AND_I]]
+//
+v128_t test_v128_andnot(v128_t a, v128_t b) {
+  return wasm_v128_andnot(a, b);
+}
+
+// CHECK-LABEL: @test_v128_any_true(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10:[0-9]+]]
+// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+//
+bool test_v128_any_true(v128_t a) {
+  return wasm_v128_any_true(a);
+}
+
+// CHECK-LABEL: @test_v128_bitselect(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
+  return wasm_v128_bitselect(a, b, mask);
+}
+
+// CHECK-LABEL: @test_i8x16_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_abs(v128_t a) {
+  return wasm_i8x16_abs(a);
+}
+
+// CHECK-LABEL: @test_i8x16_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i8x16_neg(v128_t a) {
+  return wasm_i8x16_neg(a);
+}
+
+// CHECK-LABEL: @test_i8x16_all_true(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+//
+bool test_i8x16_all_true(v128_t a) {
+  return wasm_i8x16_all_true(a);
+}
+
+// CHECK-LABEL: @test_i8x16_bitmask(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int32_t test_i8x16_bitmask(v128_t a) {
+  return wasm_i8x16_bitmask(a);
+}
+
+// CHECK-LABEL: @test_i8x16_popcnt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_popcnt(v128_t a) {
+  return wasm_i8x16_popcnt(a);
+}
+
+// CHECK-LABEL: @test_i8x16_shl(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_shl(v128_t a, int32_t b) {
+  return wasm_i8x16_shl(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_shr(v128_t a, int32_t b) {
+  return wasm_i8x16_shr(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u8x16_shr(v128_t a, int32_t b) {
+  return wasm_u8x16_shr(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_add(v128_t a, v128_t b) {
+  return wasm_i8x16_add(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_add_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
+  return wasm_i8x16_add_sat(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_add_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u8x16_add_sat(v128_t a, v128_t b) {
+  return wasm_u8x16_add_sat(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i8x16_sub(v128_t a, v128_t b) {
+  return wasm_i8x16_sub(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_sub_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
+  return wasm_i8x16_sub_sat(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_sub_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
+  return wasm_u8x16_sub_sat(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_i8x16_min(v128_t a, v128_t b) {
+  return wasm_i8x16_min(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_u8x16_min(v128_t a, v128_t b) {
+  return wasm_u8x16_min(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_i8x16_max(v128_t a, v128_t b) {
+  return wasm_i8x16_max(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_u8x16_max(v128_t a, v128_t b) {
+  return wasm_u8x16_max(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_avgr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u8x16_avgr(v128_t a, v128_t b) {
+  return wasm_u8x16_avgr(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_abs(v128_t a) {
+  return wasm_i16x8_abs(a);
+}
+
+// CHECK-LABEL: @test_i16x8_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i16x8_neg(v128_t a) {
+  return wasm_i16x8_neg(a);
+}
+
+// CHECK-LABEL: @test_i16x8_all_true(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+//
+bool test_i16x8_all_true(v128_t a) {
+  return wasm_i16x8_all_true(a);
+}
+
+// CHECK-LABEL: @test_i16x8_bitmask(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int32_t test_i16x8_bitmask(v128_t a) {
+  return wasm_i16x8_bitmask(a);
+}
+
+// CHECK-LABEL: @test_i16x8_shl(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_shl(v128_t a, int32_t b) {
+  return wasm_i16x8_shl(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_shr(v128_t a, int32_t b) {
+  return wasm_i16x8_shr(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i32 0
+// CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_shr(v128_t a, int32_t b) {
+  return wasm_u16x8_shr(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_add(v128_t a, v128_t b) {
+  return wasm_i16x8_add(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_add_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
+  return wasm_i16x8_add_sat(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_add_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_add_sat(v128_t a, v128_t b) {
+  return wasm_u16x8_add_sat(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_sub(v128_t a, v128_t b) {
+  return wasm_i16x8_sub(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_sub_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
+  return wasm_i16x8_sub_sat(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_sub_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_sub_sat(v128_t a, v128_t b) {
+  return wasm_u16x8_sub_sat(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_mul(v128_t a, v128_t b) {
+  return wasm_i16x8_mul(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_i16x8_min(v128_t a, v128_t b) {
+  return wasm_i16x8_min(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_u16x8_min(v128_t a, v128_t b) {
+  return wasm_u16x8_min(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_i16x8_max(v128_t a, v128_t b) {
+  return wasm_i16x8_max(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+v128_t test_u16x8_max(v128_t a, v128_t b) {
+  return wasm_u16x8_max(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_avgr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_avgr(v128_t a, v128_t b) {
+  return wasm_u16x8_avgr(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i32x4_abs(v128_t a) {
+  return wasm_i32x4_abs(a);
+}
+
+// CHECK-LABEL: @test_i32x4_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
+//
+v128_t test_i32x4_neg(v128_t a) {
+  return wasm_i32x4_neg(a);
+}
+
+// CHECK-LABEL: @test_i32x4_all_true(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+//
+bool test_i32x4_all_true(v128_t a) {
+  return wasm_i32x4_all_true(a);
+}
+
+// CHECK-LABEL: @test_i32x4_bitmask(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int32_t test_i32x4_bitmask(v128_t a) {
+  return wasm_i32x4_bitmask(a);
+}
+
+// CHECK-LABEL: @test_i32x4_shl(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    ret <4 x i32> [[SHL_I]]
+//
+v128_t test_i32x4_shl(v128_t a, int32_t b) {
+  return wasm_i32x4_shl(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
+//
+v128_t test_i32x4_shr(v128_t a, int32_t b) {
+  return wasm_i32x4_shr(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
+//
+v128_t test_u32x4_shr(v128_t a, int32_t b) {
+  return wasm_u32x4_shr(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[ADD_I]]
+//
+v128_t test_i32x4_add(v128_t a, v128_t b) {
+  return wasm_i32x4_add(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
+//
+v128_t test_i32x4_sub(v128_t a, v128_t b) {
+  return wasm_i32x4_sub(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
+//
+v128_t test_i32x4_mul(v128_t a, v128_t b) {
+  return wasm_i32x4_mul(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i32x4_min(v128_t a, v128_t b) {
+  return wasm_i32x4_min(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u32x4_min(v128_t a, v128_t b) {
+  return wasm_u32x4_min(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i32x4_max(v128_t a, v128_t b) {
+  return wasm_i32x4_max(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u32x4_max(v128_t a, v128_t b) {
+  return wasm_u32x4_max(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_dot_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
+  return wasm_i32x4_dot_i16x8(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_abs(v128_t a) {
+  return wasm_i64x2_abs(a);
+}
+
+// CHECK-LABEL: @test_i64x2_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_neg(v128_t a) {
+  return wasm_i64x2_neg(a);
+}
+
+// CHECK-LABEL: @test_i64x2_all_true(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    ret i1 [[TOBOOL_I]]
+//
+bool test_i64x2_all_true(v128_t a) {
+  return wasm_i64x2_all_true(a);
+}
+
+// CHECK-LABEL: @test_i64x2_bitmask(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int32_t test_i64x2_bitmask(v128_t a) {
+  return wasm_i64x2_bitmask(a);
+}
+
+// CHECK-LABEL: @test_i64x2_shl(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_shl(v128_t a, int32_t b) {
+  return wasm_i64x2_shl(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_shr(v128_t a, int32_t b) {
+  return wasm_i64x2_shr(a, b);
+}
+
+// CHECK-LABEL: @test_u64x2_shr(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i32 [[B:%.*]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u64x2_shr(v128_t a, int32_t b) {
+  return wasm_u64x2_shr(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_add(v128_t a, v128_t b) {
+  return wasm_i64x2_add(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_sub(v128_t a, v128_t b) {
+  return wasm_i64x2_sub(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i64x2_mul(v128_t a, v128_t b) {
+  return wasm_i64x2_mul(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_abs(v128_t a) {
+  return wasm_f32x4_abs(a);
+}
+
+// CHECK-LABEL: @test_f32x4_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_f32x4_neg(v128_t a) {
+  return wasm_f32x4_neg(a);
+}
+
+// CHECK-LABEL: @test_f32x4_sqrt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_sqrt(v128_t a) {
+  return wasm_f32x4_sqrt(a);
+}
+
+// CHECK-LABEL: @test_f32x4_ceil(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_ceil(v128_t a) {
+  return wasm_f32x4_ceil(a);
+}
+
+// CHECK-LABEL: @test_f32x4_floor(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_floor(v128_t a) {
+  return wasm_f32x4_floor(a);
+}
+
+// CHECK-LABEL: @test_f32x4_trunc(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_trunc(v128_t a) {
+  return wasm_f32x4_trunc(a);
+}
+
+// CHECK-LABEL: @test_f32x4_nearest(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_nearest(v128_t a) {
+  return wasm_f32x4_nearest(a);
+}
+
+// CHECK-LABEL: @test_f32x4_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_add(v128_t a, v128_t b) {
+  return wasm_f32x4_add(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_sub(v128_t a, v128_t b) {
+  return wasm_f32x4_sub(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_mul(v128_t a, v128_t b) {
+  return wasm_f32x4_mul(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_div(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_div(v128_t a, v128_t b) {
+  return wasm_f32x4_div(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f32x4_min(v128_t a, v128_t b) {
+  return wasm_f32x4_min(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f32x4_max(v128_t a, v128_t b) {
+  return wasm_f32x4_max(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_pmin(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f32x4_pmin(v128_t a, v128_t b) {
+  return wasm_f32x4_pmin(a, b);
+}
+
+// CHECK-LABEL: @test_f32x4_pmax(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f32x4_pmax(v128_t a, v128_t b) {
+  return wasm_f32x4_pmax(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_abs(v128_t a) {
+  return wasm_f64x2_abs(a);
+}
+
+// CHECK-LABEL: @test_f64x2_neg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_f64x2_neg(v128_t a) {
+  return wasm_f64x2_neg(a);
+}
+
+// CHECK-LABEL: @test_f64x2_sqrt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_sqrt(v128_t a) {
+  return wasm_f64x2_sqrt(a);
+}
+
+// CHECK-LABEL: @test_f64x2_ceil(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_ceil(v128_t a) {
+  return wasm_f64x2_ceil(a);
+}
+
+// CHECK-LABEL: @test_f64x2_floor(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_floor(v128_t a) {
+  return wasm_f64x2_floor(a);
+}
+
+// CHECK-LABEL: @test_f64x2_trunc(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_trunc(v128_t a) {
+  return wasm_f64x2_trunc(a);
+}
+
+// CHECK-LABEL: @test_f64x2_nearest(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_nearest(v128_t a) {
+  return wasm_f64x2_nearest(a);
+}
+
+// CHECK-LABEL: @test_f64x2_add(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_add(v128_t a, v128_t b) {
+  return wasm_f64x2_add(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_sub(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_sub(v128_t a, v128_t b) {
+  return wasm_f64x2_sub(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_mul(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_mul(v128_t a, v128_t b) {
+  return wasm_f64x2_mul(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_div(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_div(v128_t a, v128_t b) {
+  return wasm_f64x2_div(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f64x2_min(v128_t a, v128_t b) {
+  return wasm_f64x2_min(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f64x2_max(v128_t a, v128_t b) {
+  return wasm_f64x2_max(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_pmin(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f64x2_pmin(v128_t a, v128_t b) {
+  return wasm_f64x2_pmin(a, b);
+}
+
+// CHECK-LABEL: @test_f64x2_pmax(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_f64x2_pmax(v128_t a, v128_t b) {
+  return wasm_f64x2_pmax(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
+  return wasm_i32x4_trunc_sat_f32x4(a);
+}
+
+// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
+  return wasm_u32x4_trunc_sat_f32x4(a);
+}
+
+// CHECK-LABEL: @test_f32x4_convert_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f32x4_convert_i32x4(v128_t a) {
+  return wasm_f32x4_convert_i32x4(a);
+}
+
+// CHECK-LABEL: @test_f32x4_convert_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f32x4_convert_u32x4(v128_t a) {
+  return wasm_f32x4_convert_u32x4(a);
+}
+
+// CHECK-LABEL: @test_f64x2_convert_low_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f64x2_convert_low_i32x4(v128_t a) {
+  return wasm_f64x2_convert_low_i32x4(a);
+}
+
+// CHECK-LABEL: @test_f64x2_convert_low_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_f64x2_convert_low_u32x4(v128_t a) {
+  return wasm_f64x2_convert_low_u32x4(a);
+}
+
+// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
+  return wasm_i32x4_trunc_sat_f64x2_zero(a);
+}
+
+// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
+  return wasm_u32x4_trunc_sat_f64x2_zero(a);
+}
+
+// CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
+  return wasm_f32x4_demote_f64x2_zero(a);
+}
+
+// CHECK-LABEL: @test_f64x2_promote_low_f32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_f64x2_promote_low_f32x4(v128_t a) {
+  return wasm_f64x2_promote_low_f32x4(a);
+}
+
+// CHECK-LABEL: @test_i8x16_shuffle(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
+  return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: @test_i16x8_shuffle(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
+  return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: @test_i32x4_shuffle(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
+  return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: @test_i64x2_shuffle(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
+  return wasm_i64x2_shuffle(a, b, 1, 0);
+}
+
+// CHECK-LABEL: @test_i8x16_swizzle(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
+  return wasm_i8x16_swizzle(a, b);
+}
+
+// CHECK-LABEL: @test_i8x16_narrow_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
+  return wasm_i8x16_narrow_i16x8(a, b);
+}
+
+// CHECK-LABEL: @test_u8x16_narrow_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
+  return wasm_u8x16_narrow_i16x8(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_narrow_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
+  return wasm_i16x8_narrow_i32x4(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_narrow_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) {
+  return wasm_u16x8_narrow_i32x4(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_extend_low_i8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i16x8_extend_low_i8x16(v128_t a) {
+  return wasm_i16x8_extend_low_i8x16(a);
+}
+
+// CHECK-LABEL: @test_i16x8_extend_high_i8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i16x8_extend_high_i8x16(v128_t a) {
+  return wasm_i16x8_extend_high_i8x16(a);
+}
+
+// CHECK-LABEL: @test_u16x8_extend_low_u8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u16x8_extend_low_u8x16(v128_t a) {
+  return wasm_u16x8_extend_low_u8x16(a);
+}
+
+// CHECK-LABEL: @test_u16x8_extend_high_u8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u16x8_extend_high_u8x16(v128_t a) {
+  return wasm_u16x8_extend_high_u8x16(a);
+}
+
+// CHECK-LABEL: @test_i32x4_extend_low_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_i32x4_extend_low_i16x8(v128_t a) {
+  return wasm_i32x4_extend_low_i16x8(a);
+}
+
+// CHECK-LABEL: @test_i32x4_extend_high_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_i32x4_extend_high_i16x8(v128_t a) {
+  return wasm_i32x4_extend_high_i16x8(a);
+}
+
+// CHECK-LABEL: @test_u32x4_extend_low_u16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_u32x4_extend_low_u16x8(v128_t a) {
+  return wasm_u32x4_extend_low_u16x8(a);
+}
+
+// CHECK-LABEL: @test_u32x4_extend_high_u16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
+//
+v128_t test_u32x4_extend_high_u16x8(v128_t a) {
+  return wasm_u32x4_extend_high_u16x8(a);
+}
+
+// CHECK-LABEL: @test_i64x2_extend_low_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i64x2_extend_low_i32x4(v128_t a) {
+  return wasm_i64x2_extend_low_i32x4(a);
+}
+
+// CHECK-LABEL: @test_i64x2_extend_high_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_i64x2_extend_high_i32x4(v128_t a) {
+  return wasm_i64x2_extend_high_i32x4(a);
+}
+
+// CHECK-LABEL: @test_u64x2_extend_low_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_u64x2_extend_low_u32x4(v128_t a) {
+  return wasm_u64x2_extend_low_u32x4(a);
+}
+
+// CHECK-LABEL: @test_u64x2_extend_high_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v128_t test_u64x2_extend_high_u32x4(v128_t a) {
+  return wasm_u64x2_extend_high_u32x4(a);
+}
+
+// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
+  return wasm_i16x8_extadd_pairwise_i8x16(a);
+}
+
+// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
+  return wasm_u16x8_extadd_pairwise_u8x16(a);
+}
+
+// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
+  return wasm_i32x4_extadd_pairwise_i16x8(a);
+}
+
+// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
+  return wasm_u32x4_extadd_pairwise_u16x8(a);
+}
+
+// CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
+  return wasm_i16x8_extmul_low_i8x16(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
+  return wasm_i16x8_extmul_high_i8x16(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
+  return wasm_u16x8_extmul_low_u8x16(a, b);
+}
+
+// CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
+  return wasm_u16x8_extmul_high_u8x16(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
+  return wasm_i32x4_extmul_low_i16x8(a, b);
+}
+
+// CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
+  return wasm_i32x4_extmul_high_i16x8(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
+  return wasm_u32x4_extmul_low_u16x8(a, b);
+}
+
+// CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
+  return wasm_u32x4_extmul_high_u16x8(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
+  return wasm_i64x2_extmul_low_i32x4(a, b);
+}
+
+// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
+  return wasm_i64x2_extmul_high_i32x4(a, b);
+}
+
+// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
+  return wasm_u64x2_extmul_low_u32x4(a, b);
+}
+
+// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
+  return wasm_u64x2_extmul_high_u32x4(a, b);
+}
+
+// CHECK-LABEL: @test_i16x8_q15mulr_sat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
+  return wasm_i16x8_q15mulr_sat(a, b);
+}