[WebAssembly] Prototype extending multiplication SIMD instructions

author Thomas Lively <tlively@google.com>

Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)

committer Thomas Lively <tlively@google.com>

Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)
author Thomas Lively <tlively@google.com>
Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)
committer Thomas Lively <tlively@google.com>
Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def

index f84ce92..1ee9781 100644 (file)
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -118,6 +118,21 @@ TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")
  
  TARGET_BUILTIN(__builtin_wasm_q15mulr_saturate_s_i8x16, "V8sV8sV8s", "nc", "simd128")
  
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
+
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
+
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
+
  TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")
  TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128")
  
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index ae3645d..e389643 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16600,6 +16600,49 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
          CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed);
      return Builder.CreateCall(Callee, {LHS, RHS});
    }
+  case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
+  case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
+  case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
+  case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
+  case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
+  case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
+  case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: {
+    Value *LHS = EmitScalarExpr(E->getArg(0));
+    Value *RHS = EmitScalarExpr(E->getArg(1));
+    unsigned IntNo;
+    switch (BuiltinID) {
+    case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
+    case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
+    case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
+      IntNo = Intrinsic::wasm_extmul_low_signed;
+      break;
+    case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
+    case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
+    case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
+      IntNo = Intrinsic::wasm_extmul_low_unsigned;
+      break;
+    case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
+    case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
+    case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
+      IntNo = Intrinsic::wasm_extmul_high_signed;
+      break;
+    case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
+    case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
+    case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2:
+      IntNo = Intrinsic::wasm_extmul_high_unsigned;
+      break;
+    default:
+      llvm_unreachable("unexptected builtin ID");
+    }
+
+    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+    return Builder.CreateCall(Callee, {LHS, RHS});
+  }
    case WebAssembly::BI__builtin_wasm_bitselect: {
      Value *V1 = EmitScalarExpr(E->getArg(0));
      Value *V2 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c

index 4b88270..be61e9e 100644 (file)
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -525,6 +525,90 @@ i16x8 q15mulr_saturate_s_i16x8(i16x8 x, i16x8 y) {
    // WEBASSEMBLY-NEXT: ret
  }
  
+i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) {
+  return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y);
+  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
+  // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) {
+  return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y);
+  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
+  // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) {
+  return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y);
+  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
+  // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) {
+  return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y);
+  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
+  // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) {
+  return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y);
+  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
+  // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) {
+  return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y);
+  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
+  // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) {
+  return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y);
+  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
+  // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) {
+  return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y);
+  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
+  // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) {
+  return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y);
+  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
+  // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) {
+  return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y);
+  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
+  // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) {
+  return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y);
+  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
+  // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) {
+  return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y);
+  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
+  // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
  i32x4 dot_i16x8_s(i16x8 x, i16x8 y) {
    return __builtin_wasm_dot_s_i32x4_i16x8(x, y);
    // WEBASSEMBLY: call <4 x i32> @llvm.wasm.dot(<8 x i16> %x, <8 x i16> %y)
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td

index 7a701ec..5c503a4 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -259,6 +259,23 @@ def int_wasm_store64_lane :
  def int_wasm_popcnt :
    Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>;
  
+def int_wasm_extmul_low_signed :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_high_signed :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_low_unsigned :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_extmul_high_unsigned :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
  //===----------------------------------------------------------------------===//
  // Thread-local storage intrinsics
  //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp

index 529674f..3e8cce4 100644 (file)
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -62,12 +62,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
    uint64_t Start = OS.tell();
  
    uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
-  if (Binary <= UINT8_MAX) {
+  if (Binary < (1 << 8)) {
      OS << uint8_t(Binary);
-  } else {
-    assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
+  } else if (Binary < (1 << 16)) {
      OS << uint8_t(Binary >> 8);
      encodeULEB128(uint8_t(Binary), OS);
+  } else if (Binary < (1 << 24)) {
+    OS << uint8_t(Binary >> 16);
+    encodeULEB128(uint16_t(Binary), OS);
+  } else {
+    llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported");
    }
  
    // For br_table instructions, encode the size of the table. In the MCInst,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

index 5d72def..7e78b6c 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -16,7 +16,9 @@ multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
                    list<dag> pattern_r, string asmstr_r = "",
                    string asmstr_s = "", bits<32> simdop = -1> {
    defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
-              !or(0xfd00, !and(0xff, simdop))>,
+              !if(!ge(simdop, 0x100),
+                  !or(0xfd0000, !and(0xffff, simdop)),
+                  !or(0xfd00, !and(0xff, simdop)))>,
              Requires<[HasSIMD128]>;
  }
  
@@ -935,6 +937,57 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
                    "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
                    186>;
  
+// Extending multiplication: extmul_{low,high}_P, extmul_high
+multiclass SIMDExtBinary<ValueType vec_t, ValueType arg_t, string vec,
+                         SDNode node, string name, bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                        (outs), (ins),
+                        [(set (vec_t V128:$dst),
+                          (node (arg_t V128:$lhs), (arg_t V128:$rhs))
+                        )],
+                        vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
+                        simdop>;
+}
+
+defm EXTMUL_LOW_S :
+  SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_signed,
+                "extmul_low_i8x16_s", 154>;
+defm EXTMUL_HIGH_S :
+  SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_signed,
+                "extmul_high_i8x16_s", 157>;
+defm EXTMUL_LOW_U :
+  SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_low_unsigned,
+                "extmul_low_i8x16_u", 158>;
+defm EXTMUL_HIGH_U :
+  SIMDExtBinary<v8i16, v16i8, "i16x8", int_wasm_extmul_high_unsigned,
+                "extmul_high_i8x16_u", 159>;
+
+defm EXTMUL_LOW_S :
+  SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_signed,
+                "extmul_low_i16x8_s", 187>;
+defm EXTMUL_HIGH_S :
+  SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_signed,
+                "extmul_high_i16x8_s", 189>;
+defm EXTMUL_LOW_U :
+  SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_low_unsigned,
+                "extmul_low_i16x8_u", 190>;
+defm EXTMUL_HIGH_U :
+  SIMDExtBinary<v4i32, v8i16, "i32x4", int_wasm_extmul_high_unsigned,
+                "extmul_high_i16x8_u", 191>;
+
+defm EXTMUL_LOW_S :
+  SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_signed,
+                "extmul_low_i32x4_s", 210>;
+defm EXTMUL_HIGH_S :
+  SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_signed,
+                "extmul_high_i32x4_s", 211>;
+defm EXTMUL_LOW_U :
+  SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_low_unsigned,
+               "extmul_low_i32x4_u", 214>;
+defm EXTMUL_HIGH_U :
+  SIMDExtBinary<v2i64, v4i32, "i64x2", int_wasm_extmul_high_unsigned,
+                "extmul_high_i32x4_u", 215>;
+
  //===----------------------------------------------------------------------===//
  // Floating-point unary arithmetic
  //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

index 7c0f43d..b4edbd3 100644 (file)
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -249,6 +249,54 @@ define <8 x i16> @q15mulr_sat_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
    ret <8 x i16> %a
  }
  
+; CHECK-LABEL: extmul_low_s_v8i16:
+; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8>, <16 x i8>)
+define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %x, <16 x i8> %y) {
+  %a = call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
+    <16 x i8> %x, <16 x i8> %y
+  )
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v8i16:
+; SIMD128-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8>, <16 x i8>)
+define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %x, <16 x i8> %y) {
+  %a = call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
+    <16 x i8> %x, <16 x i8> %y
+  )
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v8i16:
+; SIMD128-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8>, <16 x i8>)
+define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %x, <16 x i8> %y) {
+  %a = call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
+    <16 x i8> %x, <16 x i8> %y
+  )
+  ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v8i16:
+; SIMD128-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8>, <16 x i8>)
+define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %x, <16 x i8> %y) {
+  %a = call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
+    <16 x i8> %x, <16 x i8> %y
+  )
+  ret <8 x i16> %a
+}
+
  ; CHECK-LABEL: any_v8i16:
  ; SIMD128-NEXT: .functype any_v8i16 (v128) -> (i32){{$}}
  ; SIMD128-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}}
@@ -328,6 +376,55 @@ define <4 x i32> @dot(<8 x i16> %x, <8 x i16> %y) {
    ret <4 x i32> %a
  }
  
+
+; CHECK-LABEL: extmul_low_s_v4i32:
+; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16>, <8 x i16>)
+define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %x, <8 x i16> %y) {
+  %a = call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
+    <8 x i16> %x, <8 x i16> %y
+  )
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v4i32:
+; SIMD128-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16>, <8 x i16>)
+define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %x, <8 x i16> %y) {
+  %a = call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
+    <8 x i16> %x, <8 x i16> %y
+  )
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v4i32:
+; SIMD128-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16>, <8 x i16>)
+define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %x, <8 x i16> %y) {
+  %a = call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
+    <8 x i16> %x, <8 x i16> %y
+  )
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v4i32:
+; SIMD128-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16>, <8 x i16>)
+define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %x, <8 x i16> %y) {
+  %a = call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
+    <8 x i16> %x, <8 x i16> %y
+  )
+  ret <4 x i32> %a
+}
+
  ; CHECK-LABEL: any_v4i32:
  ; SIMD128-NEXT: .functype any_v4i32 (v128) -> (i32){{$}}
  ; SIMD128-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}}
@@ -395,6 +492,54 @@ define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) {
  ; ==============================================================================
  ; 2 x i64
  ; ==============================================================================
+; CHECK-LABEL: extmul_low_s_v2i64:
+; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32>, <4 x i32>)
+define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %x, <4 x i32> %y) {
+  %a = call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
+    <4 x i32> %x, <4 x i32> %y
+  )
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v2i64:
+; SIMD128-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32>, <4 x i32>)
+define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %x, <4 x i32> %y) {
+  %a = call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
+    <4 x i32> %x, <4 x i32> %y
+  )
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v2i64:
+; SIMD128-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32>, <4 x i32>)
+define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %x, <4 x i32> %y) {
+  %a = call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
+    <4 x i32> %x, <4 x i32> %y
+  )
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v2i64:
+; SIMD128-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32>, <4 x i32>)
+define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) {
+  %a = call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
+    <4 x i32> %x, <4 x i32> %y
+  )
+  ret <2 x i64> %a
+}
+
  ; CHECK-LABEL: any_v2i64:
  ; SIMD128-NEXT: .functype any_v2i64 (v128) -> (i32){{$}}
  ; SIMD128-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s

index 2b737b4..a4908a0 100644 (file)
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -658,4 +658,40 @@ main:
      # CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01]
      f64x2.qfms
  
+    # CHECK: i16x8.extmul_low_i8x16_s # encoding: [0xfd,0x9a,0x01]
+    i16x8.extmul_low_i8x16_s
+
+    # CHECK: i16x8.extmul_high_i8x16_s # encoding: [0xfd,0x9d,0x01]
+    i16x8.extmul_high_i8x16_s
+
+    # CHECK: i16x8.extmul_low_i8x16_u # encoding: [0xfd,0x9e,0x01]
+    i16x8.extmul_low_i8x16_u
+
+    # CHECK: i16x8.extmul_high_i8x16_u # encoding: [0xfd,0x9f,0x01]
+    i16x8.extmul_high_i8x16_u
+
+    # CHECK: i32x4.extmul_low_i16x8_s # encoding: [0xfd,0xbb,0x01]
+    i32x4.extmul_low_i16x8_s
+
+    # CHECK: i32x4.extmul_high_i16x8_s # encoding: [0xfd,0xbd,0x01]
+    i32x4.extmul_high_i16x8_s
+
+    # CHECK: i32x4.extmul_low_i16x8_u # encoding: [0xfd,0xbe,0x01]
+    i32x4.extmul_low_i16x8_u
+
+    # CHECK: i32x4.extmul_high_i16x8_u # encoding: [0xfd,0xbf,0x01]
+    i32x4.extmul_high_i16x8_u
+
+    # CHECK: i64x2.extmul_low_i32x4_s # encoding: [0xfd,0xd2,0x01]
+    i64x2.extmul_low_i32x4_s
+
+    # CHECK: i64x2.extmul_high_i32x4_s # encoding: [0xfd,0xd3,0x01]
+    i64x2.extmul_high_i32x4_s
+
+    # CHECK: i64x2.extmul_low_i32x4_u # encoding: [0xfd,0xd6,0x01]
+    i64x2.extmul_low_i32x4_u
+
+    # CHECK: i64x2.extmul_high_i32x4_u # encoding: [0xfd,0xd7,0x01]
+    i64x2.extmul_high_i32x4_u
+
      end_function
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp

index 54aa5a8..037e852 100644 (file)
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -39,9 +39,15 @@ void emitWebAssemblyDisassemblerTables(
              ->getValue());
      if (Opc == 0xFFFFFFFF)
        continue; // No opcode defined.
-    assert(Opc <= 0xFFFF);
-    auto Prefix = Opc >> 8;
-    Opc = Opc & 0xFF;
+    assert(Opc <= 0xFFFFFF);
+    unsigned Prefix;
+    if (Opc <= 0xFFFF) {
+      Prefix = Opc >> 8;
+      Opc = Opc & 0xFF;
+    } else {
+      Prefix = Opc >> 16;
+      Opc = Opc & 0xFFFF;
+    }
      auto &CGIP = OpcodeTable[Prefix][Opc];
      // All wasm instructions have a StackBased field of type string, we only
      // want the instructions for which this is "true".
author	Thomas Lively <tlively@google.com>
	Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)
committer	Thomas Lively <tlively@google.com>
	Wed, 28 Oct 2020 16:38:59 +0000 (09:38 -0700)
clang/include/clang/Basic/BuiltinsWebAssembly.def		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/test/CodeGen/builtins-wasm.c		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicsWebAssembly.td		patch \| blob \| history
llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td		patch \| blob \| history
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll		patch \| blob \| history
llvm/test/MC/WebAssembly/simd-encodings.s		patch \| blob \| history
llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp		patch \| blob \| history