[WebAssembly] Remove intrinsics for SIMD widening ops

author Thomas Lively <tlively@google.com>

Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)

committer Thomas Lively <tlively@google.com>

Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)
author Thomas Lively <tlively@google.com>
Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)
committer Thomas Lively <tlively@google.com>
Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def

index ecee778..d0f40f9 100644 (file)
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -169,14 +169,5 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128
  TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
  TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
  
-TARGET_BUILTIN(__builtin_wasm_widen_low_s_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_s_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_u_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_u_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_s_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_s_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_u_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_u_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-
  #undef BUILTIN
  #undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index d572de1..ecc0b5b 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16528,40 +16528,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
          CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
      return Builder.CreateCall(Callee, {Low, High});
    }
-  case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
-  case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
-  case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
-  case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
-  case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
-  case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
-  case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
-  case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: {
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    unsigned IntNo;
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
-    case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
-      IntNo = Intrinsic::wasm_widen_low_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
-    case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
-      IntNo = Intrinsic::wasm_widen_high_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
-    case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
-      IntNo = Intrinsic::wasm_widen_low_unsigned;
-      break;
-    case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
-    case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8:
-      IntNo = Intrinsic::wasm_widen_high_unsigned;
-      break;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-    Function *Callee =
-        CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
-    return Builder.CreateCall(Callee, Vec);
-  }
    case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
      Value *Ops[18];
      size_t OpIdx = 0;
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h

index b781238..967008b 100644 (file)
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -35,6 +35,13 @@ typedef unsigned long long __u64x2
  typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
  typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
  
+typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef unsigned char __u8x8
+    __attribute__((__vector_size__(8), __aligned__(8)));
+typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef unsigned short __u16x4
+    __attribute__((__vector_size__(8), __aligned__(8)));
+
  #define __DEFAULT_FN_ATTRS                                                     \
    __attribute__((__always_inline__, __nodebug__, __target__("simd128"),        \
                   __min_vector_width__(128)))
@@ -1089,42 +1096,70 @@ wasm_u16x8_narrow_i32x4(v128_t __a, v128_t __b) {
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i16x8_widen_low_i8x16(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_low_s_i16x8_i8x16((__i8x16)__a);
+  return (v128_t) __builtin_convertvector(
+      (__i8x8){((__i8x16)__a)[0], ((__i8x16)__a)[1], ((__i8x16)__a)[2],
+               ((__i8x16)__a)[3], ((__i8x16)__a)[4], ((__i8x16)__a)[5],
+               ((__i8x16)__a)[6], ((__i8x16)__a)[7]},
+      __i16x8);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i16x8_widen_high_i8x16(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_high_s_i16x8_i8x16((__i8x16)__a);
+  return (v128_t) __builtin_convertvector(
+      (__i8x8){((__i8x16)__a)[8], ((__i8x16)__a)[9], ((__i8x16)__a)[10],
+               ((__i8x16)__a)[11], ((__i8x16)__a)[12], ((__i8x16)__a)[13],
+               ((__i8x16)__a)[14], ((__i8x16)__a)[15]},
+      __i16x8);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i16x8_widen_low_u8x16(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_low_u_i16x8_i8x16((__i8x16)__a);
+  return (v128_t) __builtin_convertvector(
+      (__u8x8){((__u8x16)__a)[0], ((__u8x16)__a)[1], ((__u8x16)__a)[2],
+               ((__u8x16)__a)[3], ((__u8x16)__a)[4], ((__u8x16)__a)[5],
+               ((__u8x16)__a)[6], ((__u8x16)__a)[7]},
+      __u16x8);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i16x8_widen_high_u8x16(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_high_u_i16x8_i8x16((__i8x16)__a);
+  return (v128_t) __builtin_convertvector(
+      (__u8x8){((__u8x16)__a)[8], ((__u8x16)__a)[9], ((__u8x16)__a)[10],
+               ((__u8x16)__a)[11], ((__u8x16)__a)[12], ((__u8x16)__a)[13],
+               ((__u8x16)__a)[14], ((__u8x16)__a)[15]},
+      __u16x8);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i32x4_widen_low_i16x8(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_low_s_i32x4_i16x8((__i16x8)__a);
+  return (v128_t) __builtin_convertvector(
+      (__i16x4){((__i16x8)__a)[0], ((__i16x8)__a)[1], ((__i16x8)__a)[2],
+                ((__i16x8)__a)[3]},
+      __i32x4);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i32x4_widen_high_i16x8(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_high_s_i32x4_i16x8((__i16x8)__a);
+  return (v128_t) __builtin_convertvector(
+      (__i16x4){((__i16x8)__a)[4], ((__i16x8)__a)[5], ((__i16x8)__a)[6],
+                ((__i16x8)__a)[7]},
+      __i32x4);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i32x4_widen_low_u16x8(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_low_u_i32x4_i16x8((__i16x8)__a);
+  return (v128_t) __builtin_convertvector(
+      (__u16x4){((__u16x8)__a)[0], ((__u16x8)__a)[1], ((__u16x8)__a)[2],
+                ((__u16x8)__a)[3]},
+      __u32x4);
  }
  
  static __inline__ v128_t __DEFAULT_FN_ATTRS
  wasm_i32x4_widen_high_u16x8(v128_t __a) {
-  return (v128_t)__builtin_wasm_widen_high_u_i32x4_i16x8((__i16x8)__a);
+  return (v128_t) __builtin_convertvector(
+      (__u16x4){((__u16x8)__a)[4], ((__u16x8)__a)[5], ((__u16x8)__a)[6],
+                ((__u16x8)__a)[7]},
+      __u32x4);
  }
  
  // Undefine helper macros
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c

index f7e3dc1..0f66fce 100644 (file)
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -737,54 +737,6 @@ i16x8 narrow_u_i16x8_i32x4(i32x4 low, i32x4 high) {
    // WEBASSEMBLY: ret
  }
  
-i16x8 widen_low_s_i16x8_i8x16(i8x16 v) {
-  return __builtin_wasm_widen_low_s_i16x8_i8x16(v);
-  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8> %v)
-  // WEBASSEMBLY: ret
-}
-
-i16x8 widen_high_s_i16x8_i8x16(i8x16 v) {
-  return __builtin_wasm_widen_high_s_i16x8_i8x16(v);
-  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8> %v)
-  // WEBASSEMBLY: ret
-}
-
-i16x8 widen_low_u_i16x8_i8x16(i8x16 v) {
-  return __builtin_wasm_widen_low_u_i16x8_i8x16(v);
-  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8> %v)
-  // WEBASSEMBLY: ret
-}
-
-i16x8 widen_high_u_i16x8_i8x16(i8x16 v) {
-  return __builtin_wasm_widen_high_u_i16x8_i8x16(v);
-  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8> %v)
-  // WEBASSEMBLY: ret
-}
-
-i32x4 widen_low_s_i32x4_i16x8(i16x8 v) {
-  return __builtin_wasm_widen_low_s_i32x4_i16x8(v);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16> %v)
-  // WEBASSEMBLY: ret
-}
-
-i32x4 widen_high_s_i32x4_i16x8(i16x8 v) {
-  return __builtin_wasm_widen_high_s_i32x4_i16x8(v);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16> %v)
-  // WEBASSEMBLY: ret
-}
-
-i32x4 widen_low_u_i32x4_i16x8(i16x8 v) {
-  return __builtin_wasm_widen_low_u_i32x4_i16x8(v);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16> %v)
-  // WEBASSEMBLY: ret
-}
-
-i32x4 widen_high_u_i32x4_i16x8(i16x8 v) {
-  return __builtin_wasm_widen_high_u_i32x4_i16x8(v);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16> %v)
-  // WEBASSEMBLY: ret
-}
-
  i8x16 swizzle_v8x16(i8x16 x, i8x16 y) {
    return __builtin_wasm_swizzle_v8x16(x, y);
    // WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td

index 7c9ceb1..9cc9f9e 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -159,22 +159,6 @@ def int_wasm_narrow_unsigned :
    Intrinsic<[llvm_anyvector_ty],
              [llvm_anyvector_ty, LLVMMatchType<1>],
              [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_low_signed :
-  Intrinsic<[llvm_anyvector_ty],
-            [llvm_anyvector_ty],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_high_signed :
-  Intrinsic<[llvm_anyvector_ty],
-            [llvm_anyvector_ty],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_low_unsigned :
-  Intrinsic<[llvm_anyvector_ty],
-            [llvm_anyvector_ty],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_high_unsigned :
-  Intrinsic<[llvm_anyvector_ty],
-            [llvm_anyvector_ty],
-            [IntrNoMem, IntrSpeculatable]>;
  
  // TODO: Replace these intrinsics with normal ISel patterns
  def int_wasm_pmin :
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def

index dee1c4e..5720d3e 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -29,6 +29,10 @@ HANDLE_NODETYPE(SWIZZLE)
  HANDLE_NODETYPE(VEC_SHL)
  HANDLE_NODETYPE(VEC_SHR_S)
  HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(WIDEN_LOW_S)
+HANDLE_NODETYPE(WIDEN_LOW_U)
+HANDLE_NODETYPE(WIDEN_HIGH_S)
+HANDLE_NODETYPE(WIDEN_HIGH_U)
  HANDLE_NODETYPE(THROW)
  HANDLE_NODETYPE(MEMORY_COPY)
  HANDLE_NODETYPE(MEMORY_FILL)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

index d6197e9..cdfbfe3 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -123,6 +123,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
      // Hoist bitcasts out of shuffles
      setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
  
+    // Combine extends of extract_subvectors into widening ops
+    setTargetDAGCombine(ISD::SIGN_EXTEND);
+    setTargetDAGCombine(ISD::ZERO_EXTEND);
+
      // Support saturating add for i8x16 and i16x8
      for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
        for (auto T : {MVT::v16i8, MVT::v8i16})
@@ -1745,6 +1749,49 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
    return DAG.getBitcast(DstType, NewShuffle);
  }
  
+static SDValue performVectorWidenCombine(SDNode *N,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  auto &DAG = DCI.DAG;
+  assert(N->getOpcode() == ISD::SIGN_EXTEND ||
+         N->getOpcode() == ISD::ZERO_EXTEND);
+
+  // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
+  // possible before the extract_subvector can be expanded.
+  auto Extract = N->getOperand(0);
+  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+    return SDValue();
+  auto Source = Extract.getOperand(0);
+  auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+  if (IndexNode == nullptr)
+    return SDValue();
+  auto Index = IndexNode->getZExtValue();
+
+  // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
+  // subvector is the low or high half of its source.
+  EVT ResVT = N->getValueType(0);
+  if (ResVT == MVT::v8i16) {
+    if (Extract.getValueType() != MVT::v8i8 ||
+        Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
+      return SDValue();
+  } else if (ResVT == MVT::v4i32) {
+    if (Extract.getValueType() != MVT::v4i16 ||
+        Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
+      return SDValue();
+  } else {
+    return SDValue();
+  }
+
+  bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
+  bool IsLow = Index == 0;
+
+  unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
+                                : WebAssemblyISD::WIDEN_HIGH_S)
+                       : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
+                                : WebAssemblyISD::WIDEN_HIGH_U);
+
+  return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+}
+
  SDValue
  WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
@@ -1753,5 +1800,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
      return SDValue();
    case ISD::VECTOR_SHUFFLE:
      return performVECTOR_SHUFFLECombine(N, DCI);
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+    return performVectorWidenCombine(N, DCI);
    }
  }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

index 16bfc81..9bbccec 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -892,15 +892,21 @@ def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
            (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
  
  // Widening operations
+def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>;
+def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>;
+def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>;
+def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>;
+
  multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
                       bits<32> baseInst> {
-  defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed,
+  defm "" : SIMDConvert<vec_t, arg_t, widen_low_s,
                          vec#".widen_low_"#arg#"_s", baseInst>;
-  defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed,
+  defm "" : SIMDConvert<vec_t, arg_t, widen_high_s,
                          vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>;
-  defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned,
+  defm "" : SIMDConvert<vec_t, arg_t, widen_low_u,
                          vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>;
-  defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned,
+  defm "" : SIMDConvert<vec_t, arg_t, widen_high_u,
                          vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
  }
  
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

index 05d256f..63092a8 100644 (file)
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -294,46 +294,6 @@ define <8 x i16> @narrow_unsigned_v8i16(<4 x i32> %low, <4 x i32> %high) {
    ret <8 x i16> %a
  }
  
-; CHECK-LABEL: widen_low_signed_v8i16:
-; SIMD128-NEXT: .functype widen_low_signed_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_low_i8x16_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_low_signed_v8i16(<16 x i8> %v) {
-  %a = call <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8> %v)
-  ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_high_signed_v8i16:
-; SIMD128-NEXT: .functype widen_high_signed_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_high_i8x16_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_high_signed_v8i16(<16 x i8> %v) {
-  %a = call <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8> %v)
-  ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_low_unsigned_v8i16:
-; SIMD128-NEXT: .functype widen_low_unsigned_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_low_i8x16_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_low_unsigned_v8i16(<16 x i8> %v) {
-  %a = call <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8> %v)
-  ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_high_unsigned_v8i16:
-; SIMD128-NEXT: .functype widen_high_unsigned_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_high_i8x16_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_high_unsigned_v8i16(<16 x i8> %v) {
-  %a = call <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8> %v)
-  ret <8 x i16> %a
-}
-
  ; ==============================================================================
  ; 4 x i32
  ; ==============================================================================
@@ -411,46 +371,6 @@ define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) {
    ret <4 x i32> %a
  }
  
-; CHECK-LABEL: widen_low_signed_v4i32:
-; SIMD128-NEXT: .functype widen_low_signed_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_low_i16x8_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_low_signed_v4i32(<8 x i16> %v) {
-  %a = call <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16> %v)
-  ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_high_signed_v4i32:
-; SIMD128-NEXT: .functype widen_high_signed_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_high_i16x8_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_high_signed_v4i32(<8 x i16> %v) {
-  %a = call <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16> %v)
-  ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_low_unsigned_v4i32:
-; SIMD128-NEXT: .functype widen_low_unsigned_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_low_i16x8_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_low_unsigned_v4i32(<8 x i16> %v) {
-  %a = call <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16> %v)
-  ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_high_unsigned_v4i32:
-; SIMD128-NEXT: .functype widen_high_unsigned_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_high_i16x8_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_high_unsigned_v4i32(<8 x i16> %v) {
-  %a = call <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16> %v)
-  ret <4 x i32> %a
-}
-
  ; ==============================================================================
  ; 2 x i64
  ; ==============================================================================
diff --git a/llvm/test/CodeGen/WebAssembly/simd-widening.ll b/llvm/test/CodeGen/WebAssembly/simd-widening.ll

new file mode 100644 (file)

index 0000000..c9a7ffb
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-widening.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+simd128 | FileCheck %s
+
+;; Test that SIMD widening operations can be successfully selected
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+define <8 x i16> @widen_low_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_low_i8x16_s:
+; CHECK:         .functype widen_low_i8x16_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.widen_low_i8x16_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+           <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %widened = sext <8 x i8> %low to <8 x i16>
+  ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_low_i8x16_u(<16 x i8> %v) {
+; CHECK-LABEL: widen_low_i8x16_u:
+; CHECK:         .functype widen_low_i8x16_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.widen_low_i8x16_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+           <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %widened = zext <8 x i8> %low to <8 x i16>
+  ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_high_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_high_i8x16_s:
+; CHECK:         .functype widen_high_i8x16_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.widen_high_i8x16_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+           <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %widened = sext <8 x i8> %low to <8 x i16>
+  ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_high_i8x16_u(<16 x i8> %v) {
+; CHECK-LABEL: widen_high_i8x16_u:
+; CHECK:         .functype widen_high_i8x16_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.widen_high_i8x16_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+           <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %widened = zext <8 x i8> %low to <8 x i16>
+  ret <8 x i16> %widened
+}
+
+define <4 x i32> @widen_low_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_low_i16x8_s:
+; CHECK:         .functype widen_low_i16x8_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32x4.widen_low_i16x8_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+           <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %widened = sext <4 x i16> %low to <4 x i32>
+  ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_low_i16x8_u(<8 x i16> %v) {
+; CHECK-LABEL: widen_low_i16x8_u:
+; CHECK:         .functype widen_low_i16x8_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32x4.widen_low_i16x8_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+           <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %widened = zext <4 x i16> %low to <4 x i32>
+  ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_high_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_high_i16x8_s:
+; CHECK:         .functype widen_high_i16x8_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32x4.widen_high_i16x8_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+           <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %widened = sext <4 x i16> %low to <4 x i32>
+  ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_high_i16x8_u(<8 x i16> %v) {
+; CHECK-LABEL: widen_high_i16x8_u:
+; CHECK:         .functype widen_high_i16x8_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32x4.widen_high_i16x8_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+           <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %widened = zext <4 x i16> %low to <4 x i32>
+  ret <4 x i32> %widened
+}
+
+;; Also test that similar patterns with offsets not corresponding to
+;; the low or high half are correctly expanded.
+
+define <8 x i16> @widen_lowish_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_lowish_i8x16_s:
+; CHECK:         .functype widen_lowish_i8x16_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 1
+; CHECK-NEXT:    i16x8.splat
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 2
+; CHECK-NEXT:    i16x8.replace_lane 1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 3
+; CHECK-NEXT:    i16x8.replace_lane 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 4
+; CHECK-NEXT:    i16x8.replace_lane 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 5
+; CHECK-NEXT:    i16x8.replace_lane 4
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 6
+; CHECK-NEXT:    i16x8.replace_lane 5
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 7
+; CHECK-NEXT:    i16x8.replace_lane 6
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i8x16.extract_lane_u 8
+; CHECK-NEXT:    i16x8.replace_lane 7
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i16x8.shl
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i16x8.shr_s
+; CHECK-NEXT:    # fallthrough-return
+  %lowish = shufflevector <16 x i8> %v, <16 x i8> undef,
+           <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %widened = sext <8 x i8> %lowish to <8 x i16>
+  ret <8 x i16> %widened
+}
+
+define <4 x i32> @widen_lowish_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_lowish_i16x8_s:
+; CHECK:         .functype widen_lowish_i16x8_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.extract_lane_u 1
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.extract_lane_u 2
+; CHECK-NEXT:    i32x4.replace_lane 1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.extract_lane_u 3
+; CHECK-NEXT:    i32x4.replace_lane 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i16x8.extract_lane_u 4
+; CHECK-NEXT:    i32x4.replace_lane 3
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32x4.shl
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32x4.shr_s
+; CHECK-NEXT:    # fallthrough-return
+  %lowish = shufflevector <8 x i16> %v, <8 x i16> undef,
+           <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %widened = sext <4 x i16> %lowish to <4 x i32>
+  ret <4 x i32> %widened
+}
author	Thomas Lively <tlively@google.com>
	Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)
committer	Thomas Lively <tlively@google.com>
	Wed, 29 Jul 2020 01:25:55 +0000 (18:25 -0700)
clang/include/clang/Basic/BuiltinsWebAssembly.def		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/lib/Headers/wasm_simd128.h		patch \| blob \| history
clang/test/CodeGen/builtins-wasm.c		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicsWebAssembly.td		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyISD.def		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td		patch \| blob \| history
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll		patch \| blob \| history
llvm/test/CodeGen/WebAssembly/simd-widening.ll	[new file with mode: 0644]	patch \| blob