[WebAssembly] Add wasm_simd128.h intrinsics for relaxed SIMD
authorThomas Lively <tlively@google.com>
Thu, 18 May 2023 15:24:58 +0000 (08:24 -0700)
committerThomas Lively <tlively@google.com>
Thu, 18 May 2023 15:24:58 +0000 (08:24 -0700)
Add user-friendly intrinsic functions for all relaxed SIMD instructions
alongside the existing SIMD128 intrinsic functions in wasm_simd128.h. Test that
the new instrinsics lower to the expected instructions in the existing
cross-project-tests test file.

Reviewed By: aheejin, sbc100

Differential Revision: https://reviews.llvm.org/D150833

clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/Headers/wasm_simd128.h
cross-project-tests/intrinsic-header-tests/wasm_simd128.c

index ddd8bc9..de89738 100644 (file)
@@ -161,7 +161,7 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4, "V4iV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
 
-// Relaxed SIMD builtins (experimental)
+// Relaxed SIMD builtins
 TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
 TARGET_BUILTIN(__builtin_wasm_relaxed_nmadd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
 TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
index a099ab5..2327bec 100644 (file)
@@ -1760,6 +1760,126 @@ wasm_u64x2_load_32x2(const void *__mem) {
   __DEPRECATED_WASM_MACRO("wasm_v64x2_shuffle", "wasm_i64x2_shuffle")          \
   wasm_i64x2_shuffle(__a, __b, __c0, __c1)
 
+// Relaxed SIMD intrinsics
+
+#define __RELAXED_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("relaxed-simd"),   \
+                 __min_vector_width__(128)))
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_f32x4_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) {
+  return (v128_t)__builtin_wasm_relaxed_madd_f32x4((__f32x4)__a, (__f32x4)__b,
+                                                   (__f32x4)__c);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_f32x4_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) {
+  return (v128_t)__builtin_wasm_relaxed_nmadd_f32x4((__f32x4)__a, (__f32x4)__b,
+                                                    (__f32x4)__c);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_f64x2_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) {
+  return (v128_t)__builtin_wasm_relaxed_madd_f64x2((__f64x2)__a, (__f64x2)__b,
+                                                   (__f64x2)__c);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_f64x2_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) {
+  return (v128_t)__builtin_wasm_relaxed_nmadd_f64x2((__f64x2)__a, (__f64x2)__b,
+                                                    (__f64x2)__c);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i8x16_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
+  return (v128_t)__builtin_wasm_relaxed_laneselect_i8x16(
+      (__i8x16)__a, (__i8x16)__b, (__i8x16)__m);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i16x8_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
+  return (v128_t)__builtin_wasm_relaxed_laneselect_i16x8(
+      (__i16x8)__a, (__i16x8)__b, (__i16x8)__m);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i32x4_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
+  return (v128_t)__builtin_wasm_relaxed_laneselect_i32x4(
+      (__i32x4)__a, (__i32x4)__b, (__i32x4)__m);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i64x2_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
+  return (v128_t)__builtin_wasm_relaxed_laneselect_i64x2(
+      (__i64x2)__a, (__i64x2)__b, (__i64x2)__m);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i8x16_relaxed_swizzle(v128_t __a, v128_t __s) {
+  return (v128_t)__builtin_wasm_relaxed_swizzle_i8x16((__i8x16)__a,
+                                                      (__i8x16)__s);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_min(v128_t __a,
+                                                                   v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_min_f32x4((__f32x4)__a, (__f32x4)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_max(v128_t __a,
+                                                                   v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_max_f32x4((__f32x4)__a, (__f32x4)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_min(v128_t __a,
+                                                                   v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_min_f64x2((__f64x2)__a, (__f64x2)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_max(v128_t __a,
+                                                                   v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_max_f64x2((__f64x2)__a, (__f64x2)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i32x4_relaxed_trunc_f32x4(v128_t __a) {
+  return (v128_t)__builtin_wasm_relaxed_trunc_s_i32x4_f32x4((__f32x4)__a);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_u32x4_relaxed_trunc_f32x4(v128_t __a) {
+  return (v128_t)__builtin_wasm_relaxed_trunc_u_i32x4_f32x4((__f32x4)__a);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i32x4_relaxed_trunc_f64x2_zero(v128_t __a) {
+  return (v128_t)__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2((__f64x2)__a);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_u32x4_relaxed_trunc_f64x2_zero(v128_t __a) {
+  return (v128_t)__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2((__f64x2)__a);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i16x8_relaxed_q15mulr(v128_t __a, v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_q15mulr_s_i16x8((__i16x8)__a,
+                                                        (__i16x8)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i16x8_relaxed_dot_i8x16_i7x16(v128_t __a, v128_t __b) {
+  return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8((__i8x16)__a,
+                                                                (__i8x16)__b);
+}
+
+static __inline__ v128_t __RELAXED_FN_ATTRS
+wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) {
+  return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4(
+      (__i8x16)__a, (__i8x16)__b, (__i32x4)__c);
+}
+
+// Deprecated intrinsics
+
 static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle")
 wasm_v8x16_swizzle(v128_t __a, v128_t __b) {
   return wasm_i8x16_swizzle(__a, __b);
index 27426ad..fb15e01 100644 (file)
@@ -1,7 +1,8 @@
 // REQUIRES: webassembly-registered-target
 // expected-no-diagnostics
 
-// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown -msimd128 -Wcast-qual -Werror | FileCheck %s
+// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \
+// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s
 
 #include <wasm_simd128.h>
 
@@ -1264,3 +1265,123 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
 v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
   return wasm_i16x8_q15mulr_sat(a, b);
 }
+
+// CHECK-LABEL: test_f32x4_relaxed_madd:
+// CHECK: f32x4.relaxed_madd{{$}}
+v128_t test_f32x4_relaxed_madd(v128_t a, v128_t b, v128_t c) {
+  return wasm_f32x4_relaxed_madd(a, b, c);
+}
+
+// CHECK-LABEL: test_f32x4_relaxed_nmadd:
+// CHECK: f32x4.relaxed_nmadd{{$}}
+v128_t test_f32x4_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
+  return wasm_f32x4_relaxed_nmadd(a, b, c);
+}
+
+// CHECK-LABEL: test_f64x2_relaxed_madd:
+// CHECK: f64x2.relaxed_madd{{$}}
+v128_t test_f64x2_relaxed_madd(v128_t a, v128_t b, v128_t c) {
+  return wasm_f64x2_relaxed_madd(a, b, c);
+}
+
+// CHECK-LABEL: test_f64x2_relaxed_nmadd:
+// CHECK: f64x2.relaxed_nmadd{{$}}
+v128_t test_f64x2_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
+  return wasm_f64x2_relaxed_nmadd(a, b, c);
+}
+
+// CHECK-LABEL: test_i8x16_relaxed_laneselect:
+// CHECK: i8x16.relaxed_laneselect{{$}}
+v128_t test_i8x16_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
+  return wasm_i8x16_relaxed_laneselect(a, b, m);
+}
+
+// CHECK-LABEL: test_i16x8_relaxed_laneselect:
+// CHECK: i16x8.relaxed_laneselect{{$}}
+v128_t test_i16x8_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
+  return wasm_i16x8_relaxed_laneselect(a, b, m);
+}
+
+// CHECK-LABEL: test_i32x4_relaxed_laneselect:
+// CHECK: i32x4.relaxed_laneselect{{$}}
+v128_t test_i32x4_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
+  return wasm_i32x4_relaxed_laneselect(a, b, m);
+}
+
+// CHECK-LABEL: test_i64x2_relaxed_laneselect:
+// CHECK: i64x2.relaxed_laneselect{{$}}
+v128_t test_i64x2_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
+  return wasm_i64x2_relaxed_laneselect(a, b, m);
+}
+
+// CHECK-LABEL: test_i8x16_relaxed_swizzle:
+// CHECK: i8x16.relaxed_swizzle{{$}}
+v128_t test_i8x16_relaxed_swizzle(v128_t a, v128_t s) {
+  return wasm_i8x16_relaxed_swizzle(a, s);
+}
+
+// CHECK-LABEL: test_f32x4_relaxed_min:
+// CHECK: f32x4.relaxed_min{{$}}
+v128_t test_f32x4_relaxed_min(v128_t a, v128_t b) {
+  return wasm_f32x4_relaxed_min(a, b);
+}
+
+// CHECK-LABEL: test_f32x4_relaxed_max:
+// CHECK: f32x4.relaxed_max{{$}}
+v128_t test_f32x4_relaxed_max(v128_t a, v128_t b) {
+  return wasm_f32x4_relaxed_max(a, b);
+}
+
+// CHECK-LABEL: test_f64x2_relaxed_min:
+// CHECK: f64x2.relaxed_min{{$}}
+v128_t test_f64x2_relaxed_min(v128_t a, v128_t b) {
+  return wasm_f64x2_relaxed_min(a, b);
+}
+
+// CHECK-LABEL: test_f64x2_relaxed_max:
+// CHECK: f64x2.relaxed_max
+v128_t test_f64x2_relaxed_max(v128_t a, v128_t b) {
+  return wasm_f64x2_relaxed_max(a, b);
+}
+
+// CHECK-LABEL: test_i32x4_relaxed_trunc_f32x4:
+// CHECK: i32x4.relaxed_trunc_f32x4_s{{$}}
+v128_t test_i32x4_relaxed_trunc_f32x4(v128_t a) {
+  return wasm_i32x4_relaxed_trunc_f32x4(a);
+}
+
+// CHECK-LABEL: test_u32x4_relaxed_trunc_f32x4:
+// CHECK: i32x4.relaxed_trunc_f32x4_u{{$}}
+v128_t test_u32x4_relaxed_trunc_f32x4(v128_t a) {
+  return wasm_u32x4_relaxed_trunc_f32x4(a);
+}
+
+// CHECK-LABEL: test_i32x4_relaxed_trunc_f64x2_zero:
+// CHECK: i32x4.relaxed_trunc_f64x2_s_zero{{$}}
+v128_t test_i32x4_relaxed_trunc_f64x2_zero(v128_t a) {
+  return wasm_i32x4_relaxed_trunc_f64x2_zero(a);
+}
+
+// CHECK-LABEL: test_u32x4_relaxed_trunc_f64x2_zero:
+// CHECK: i32x4.relaxed_trunc_f64x2_u_zero{{$}}
+v128_t test_u32x4_relaxed_trunc_f64x2_zero(v128_t a) {
+  return wasm_u32x4_relaxed_trunc_f64x2_zero(a);
+}
+
+// CHECK-LABEL: test_i16x8_relaxed_q15mulr:
+// CHECK: i16x8.relaxed_q15mulr_s{{$}}
+v128_t test_i16x8_relaxed_q15mulr(v128_t a, v128_t b) {
+  return wasm_i16x8_relaxed_q15mulr(a, b);
+}
+
+// CHECK-LABEL: test_i16x8_relaxed_dot_i8x16_i7x16:
+// CHECK: i16x8.relaxed_dot_i8x16_i7x16_s{{$}}
+v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) {
+  return wasm_i16x8_relaxed_dot_i8x16_i7x16(a, b);
+}
+
+// CHECK-LABEL: test_i32x4_relaxed_dot_i8x16_i7x16_add:
+// CHECK: i32x4.relaxed_dot_i8x16_i7x16_add_s{{$}}
+v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) {
+  return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c);
+}