[WebAssembly] Remove clang builtins for extract_lane and replace_lane
authorThomas Lively <tlively@google.com>
Wed, 21 Jul 2021 23:11:00 +0000 (16:11 -0700)
committerThomas Lively <tlively@google.com>
Wed, 21 Jul 2021 23:11:00 +0000 (16:11 -0700)
These builtins were added to capture the fact that the underlying Wasm
instructions return i32s and implicitly sign or zero extend the extracted lanes
in the case of the i8x16 and i16x8 variants. But we do sufficient optimizations
during code gen that these low-level details do not need to be exposed to users.

This commit replaces the use of the builtins in wasm_simd128.h with normal
target-independent vector code. As a result, we can switch the relevant
intrinsics to use functions rather than macros and can use more user-friendly
return types rather than trying to precisely expose the underlying Wasm types.
Note, however, that the generated LLVM IR is no different after this change.

Differential Revision: https://reviews.llvm.org/D106500

clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/wasm_simd128.h
clang/test/CodeGen/builtins-wasm.c

index 72ba833..07c368a 100644 (file)
@@ -68,22 +68,6 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64_f64, "LLid", "nc", "nontrappi
 // SIMD builtins
 TARGET_BUILTIN(__builtin_wasm_swizzle_i8x16, "V16ScV16ScV16Sc", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i8x16, "iV16ScIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16UcIUi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i16x8, "iV8sIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i16x8, "iV8UsIUi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_i32x4, "iV4iIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_i64x2, "LLiV2LLiIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_f32x4, "fV4fIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extract_lane_f64x2, "dV2dIi", "nc", "simd128")
-
-TARGET_BUILTIN(__builtin_wasm_replace_lane_i8x16, "V16ScV16ScIii", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_i16x8, "V8sV8sIii", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_i32x4, "V4iV4iIii", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_i64x2, "V2LLiV2LLiIiLLi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_f32x4, "V4fV4fIif", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_replace_lane_f64x2, "V2dV2dIid", "nc", "simd128")
-
 TARGET_BUILTIN(__builtin_wasm_add_sat_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_add_sat_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_add_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128")
index ad0fc96..7a7e9a1 100644 (file)
@@ -17635,63 +17635,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
     return Builder.CreateCall(Callee, {Src, Indices});
   }
-  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
-  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
-  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
-  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
-  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
-  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
-  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
-  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
-    llvm::APSInt LaneConst =
-        *E->getArg(1)->getIntegerConstantExpr(getContext());
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
-    Value *Extract = Builder.CreateExtractElement(Vec, Lane);
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
-    case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
-      return Builder.CreateSExt(Extract, ConvertType(E->getType()));
-    case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
-    case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
-      return Builder.CreateZExt(Extract, ConvertType(E->getType()));
-    case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
-    case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
-    case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
-    case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
-      return Extract;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-  }
-  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
-  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
-  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
-  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
-  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
-  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
-    llvm::APSInt LaneConst =
-        *E->getArg(1)->getIntegerConstantExpr(getContext());
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
-    Value *Val = EmitScalarExpr(E->getArg(2));
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
-    case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
-      llvm::Type *ElemType =
-          cast<llvm::VectorType>(ConvertType(E->getType()))->getElementType();
-      Value *Trunc = Builder.CreateTrunc(Val, ElemType);
-      return Builder.CreateInsertElement(Vec, Trunc, Lane);
-    }
-    case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
-    case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
-    case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
-    case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
-      return Builder.CreateInsertElement(Vec, Val, Lane);
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-  }
   case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
   case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
   case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
index fdb32bd..bd5dbd5 100644 (file)
@@ -396,67 +396,126 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) {
                            __a, __a, __a, __a, __a, __a, __a, __a};
 }
 
-#define wasm_i8x16_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(__a), __i))
+static __inline__ int8_t __DEFAULT_FN_ATTRS wasm_i8x16_extract_lane(v128_t __a,
+                                                                    int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__i8x16)__a)[__i];
+}
 
-#define wasm_u8x16_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_u_i8x16((__u8x16)(__a), __i))
+static __inline__ uint8_t __DEFAULT_FN_ATTRS wasm_u8x16_extract_lane(v128_t __a,
+                                                                     int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__u8x16)__a)[__i];
+}
 
-#define wasm_i8x16_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_i8x16((__i8x16)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    int8_t __b)
+    __REQUIRE_CONSTANT(__i) {
+  __i8x16 __v = (__i8x16)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) {
   return (v128_t)(__i16x8){__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
-#define wasm_i16x8_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(__a), __i))
+static __inline__ int16_t __DEFAULT_FN_ATTRS wasm_i16x8_extract_lane(v128_t __a,
+                                                                     int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__i16x8)__a)[__i];
+}
 
-#define wasm_u16x8_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_u_i16x8((__u16x8)(__a), __i))
+static __inline__ uint16_t __DEFAULT_FN_ATTRS
+wasm_u16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) {
+  return ((__u16x8)__a)[__i];
+}
 
-#define wasm_i16x8_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_i16x8((__i16x8)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    int16_t __b)
+    __REQUIRE_CONSTANT(__i) {
+  __i16x8 __v = (__i16x8)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t __a) {
   return (v128_t)(__i32x4){__a, __a, __a, __a};
 }
 
-#define wasm_i32x4_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_i32x4((__i32x4)(__a), __i))
+static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_extract_lane(v128_t __a,
+                                                                     int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__i32x4)__a)[__i];
+}
 
-#define wasm_i32x4_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_i32x4((__i32x4)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    int32_t __b)
+    __REQUIRE_CONSTANT(__i) {
+  __i32x4 __v = (__i32x4)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t __a) {
   return (v128_t)(__i64x2){__a, __a};
 }
 
-#define wasm_i64x2_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_i64x2((__i64x2)(__a), __i))
+static __inline__ int64_t __DEFAULT_FN_ATTRS wasm_i64x2_extract_lane(v128_t __a,
+                                                                     int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__i64x2)__a)[__i];
+}
 
-#define wasm_i64x2_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_i64x2((__i64x2)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    int64_t __b)
+    __REQUIRE_CONSTANT(__i) {
+  __i64x2 __v = (__i64x2)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float __a) {
   return (v128_t)(__f32x4){__a, __a, __a, __a};
 }
 
-#define wasm_f32x4_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_f32x4((__f32x4)(__a), __i))
+static __inline__ float __DEFAULT_FN_ATTRS wasm_f32x4_extract_lane(v128_t __a,
+                                                                   int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__f32x4)__a)[__i];
+}
 
-#define wasm_f32x4_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_f32x4((__f32x4)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    float __b)
+    __REQUIRE_CONSTANT(__i) {
+  __f32x4 __v = (__f32x4)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double __a) {
   return (v128_t)(__f64x2){__a, __a};
 }
 
-#define wasm_f64x2_extract_lane(__a, __i)                                      \
-  (__builtin_wasm_extract_lane_f64x2((__f64x2)(__a), __i))
+static __inline__ double __DEFAULT_FN_ATTRS wasm_f64x2_extract_lane(v128_t __a,
+                                                                    int __i)
+    __REQUIRE_CONSTANT(__i) {
+  return ((__f64x2)__a)[__i];
+}
 
-#define wasm_f64x2_replace_lane(__a, __i, __b)                                 \
-  ((v128_t)__builtin_wasm_replace_lane_f64x2((__f64x2)(__a), __i, __b))
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_replace_lane(v128_t __a,
+                                                                    int __i,
+                                                                    double __b)
+    __REQUIRE_CONSTANT(__i) {
+  __f64x2 __v = (__f64x2)__a;
+  __v[__i] = __b;
+  return (v128_t)__v;
+}
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t __a,
                                                           v128_t __b) {
index b94652d..ea2beed 100644 (file)
@@ -193,99 +193,9 @@ double max_f64(double x, double y) {
   // WEBASSEMBLY-NEXT: ret
 }
 
-int extract_lane_s_i8x16(i8x16 v) {
-  return __builtin_wasm_extract_lane_s_i8x16(v, 13);
-  // MISSING-SIMD: error: '__builtin_wasm_extract_lane_s_i8x16' needs target feature simd128
-  // WEBASSEMBLY: extractelement <16 x i8> %v, i32 13
-  // WEBASSEMBLY-NEXT: sext
-  // WEBASSEMBLY-NEXT: ret
-}
-
-int extract_lane_u_i8x16(u8x16 v) {
-  return __builtin_wasm_extract_lane_u_i8x16(v, 13);
-  // WEBASSEMBLY: extractelement <16 x i8> %v, i32 13
-  // WEBASSEMBLY-NEXT: zext
-  // WEBASSEMBLY-NEXT: ret
-}
-
-int extract_lane_s_i16x8(i16x8 v) {
-  return __builtin_wasm_extract_lane_s_i16x8(v, 7);
-  // WEBASSEMBLY: extractelement <8 x i16> %v, i32 7
-  // WEBASSEMBLY-NEXT: sext
-  // WEBASSEMBLY-NEXT: ret
-}
-
-int extract_lane_u_i16x8(u16x8 v) {
-  return __builtin_wasm_extract_lane_u_i16x8(v, 7);
-  // WEBASSEMBLY: extractelement <8 x i16> %v, i32 7
-  // WEBASSEMBLY-NEXT: zext
-  // WEBASSEMBLY-NEXT: ret
-}
-
-int extract_lane_i32x4(i32x4 v) {
-  return __builtin_wasm_extract_lane_i32x4(v, 3);
-  // WEBASSEMBLY: extractelement <4 x i32> %v, i32 3
-  // WEBASSEMBLY-NEXT: ret
-}
-
-long long extract_lane_i64x2(i64x2 v) {
-  return __builtin_wasm_extract_lane_i64x2(v, 1);
-  // WEBASSEMBLY: extractelement <2 x i64> %v, i32 1
-  // WEBASSEMBLY-NEXT: ret
-}
-
-float extract_lane_f32x4(f32x4 v) {
-  return __builtin_wasm_extract_lane_f32x4(v, 3);
-  // WEBASSEMBLY: extractelement <4 x float> %v, i32 3
-  // WEBASSEMBLY-NEXT: ret
-}
-
-double extract_lane_f64x2(f64x2 v) {
-  return __builtin_wasm_extract_lane_f64x2(v, 1);
-  // WEBASSEMBLY: extractelement <2 x double> %v, i32 1
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i8x16 replace_lane_i8x16(i8x16 v, int x) {
-  return __builtin_wasm_replace_lane_i8x16(v, 13, x);
-  // WEBASSEMBLY: trunc i32 %x to i8
-  // WEBASSEMBLY-NEXT: insertelement <16 x i8> %v, i8 %{{.*}}, i32 13
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i16x8 replace_lane_i16x8(i16x8 v, int x) {
-  return __builtin_wasm_replace_lane_i16x8(v, 7, x);
-  // WEBASSEMBLY: trunc i32 %x to i16
-  // WEBASSEMBLY-NEXT: insertelement <8 x i16> %v, i16 %{{.*}}, i32 7
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i32x4 replace_lane_i32x4(i32x4 v, int x) {
-  return __builtin_wasm_replace_lane_i32x4(v, 3, x);
-  // WEBASSEMBLY: insertelement <4 x i32> %v, i32 %x, i32 3
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i64x2 replace_lane_i64x2(i64x2 v, long long x) {
-  return __builtin_wasm_replace_lane_i64x2(v, 1, x);
-  // WEBASSEMBLY: insertelement <2 x i64> %v, i64 %x, i32 1
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f32x4 replace_lane_f32x4(f32x4 v, float x) {
-  return __builtin_wasm_replace_lane_f32x4(v, 3, x);
-  // WEBASSEMBLY: insertelement <4 x float> %v, float %x, i32 3
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f64x2 replace_lane_f64x2(f64x2 v, double x) {
-  return __builtin_wasm_replace_lane_f64x2(v, 1, x);
-  // WEBASSEMBLY: insertelement <2 x double> %v, double %x, i32 1
-  // WEBASSEMBLY-NEXT: ret
-}
-
 i8x16 add_sat_s_i8x16(i8x16 x, i8x16 y) {
   return __builtin_wasm_add_sat_s_i8x16(x, y);
+  // MISSING-SIMD: error: '__builtin_wasm_add_sat_s_i8x16' needs target feature simd128
   // WEBASSEMBLY: call <16 x i8> @llvm.sadd.sat.v16i8(
   // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
   // WEBASSEMBLY-NEXT: ret