[WebAssembly] Add prototype relaxed SIMD fma/fms instructions

author Thomas Lively <tlively@google.com>

Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)

committer Thomas Lively <tlively@google.com>

Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)
author Thomas Lively <tlively@google.com>
Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)
committer Thomas Lively <tlively@google.com>
Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def

index f5120b23f81187cfe4dffecfd4e71a0ebe8451ec..778424686a10bd0ebaa10a465906b8cfabd6aab5 100644 (file)
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -161,5 +161,11 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
  TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
  TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
  
+// Relaxed SIMD builtins (experimental)
+TARGET_BUILTIN(__builtin_wasm_fma_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fms_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fma_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fms_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
+
  #undef BUILTIN
  #undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index d485e8e76769213e28234e2c7913398d4e18b6d7..2a9ab387fa5275b5bc3cdc4eeba80f2be894ff45 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18222,6 +18222,29 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
      Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
      return Builder.CreateCall(Callee, Ops);
    }
+  case WebAssembly::BI__builtin_wasm_fma_f32x4:
+  case WebAssembly::BI__builtin_wasm_fms_f32x4:
+  case WebAssembly::BI__builtin_wasm_fma_f64x2:
+  case WebAssembly::BI__builtin_wasm_fms_f64x2: {
+    Value *A = EmitScalarExpr(E->getArg(0));
+    Value *B = EmitScalarExpr(E->getArg(1));
+    Value *C = EmitScalarExpr(E->getArg(2));
+    unsigned IntNo;
+    switch (BuiltinID) {
+    case WebAssembly::BI__builtin_wasm_fma_f32x4:
+    case WebAssembly::BI__builtin_wasm_fma_f64x2:
+      IntNo = Intrinsic::wasm_fma;
+      break;
+    case WebAssembly::BI__builtin_wasm_fms_f32x4:
+    case WebAssembly::BI__builtin_wasm_fms_f64x2:
+      IntNo = Intrinsic::wasm_fms;
+      break;
+    default:
+      llvm_unreachable("unexpected builtin ID");
+    }
+    Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
+    return Builder.CreateCall(Callee, {A, B, C});
+  }
    default:
      return nullptr;
    }
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c

index 7f67d78693d0cf15cef785080f0b63a2c38b1da3..914c37dc1e1ef498f5b68ba3d6944b8de8c6a87f 100644 (file)
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
-// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
  // RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
  
  // SIMD convenience types
@@ -676,3 +676,31 @@ i8x16 shuffle(i8x16 x, i8x16 y) {
    // WEBASSEMBLY-SAME: i32 15
    // WEBASSEMBLY-NEXT: ret
  }
+
+f32x4 fma_f32x4(f32x4 a, f32x4 b, f32x4 c) {
+  return __builtin_wasm_fma_f32x4(a, b, c);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.fma.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f32x4 fms_f32x4(f32x4 a, f32x4 b, f32x4 c) {
+  return __builtin_wasm_fms_f32x4(a, b, c);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.fms.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 fma_f64x2(f64x2 a, f64x2 b, f64x2 c) {
+  return __builtin_wasm_fma_f64x2(a, b, c);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.fma.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 fms_f64x2(f64x2 a, f64x2 b, f64x2 c) {
+  return __builtin_wasm_fms_f64x2(a, b, c);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.fms.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
+  // WEBASSEMBLY-NEXT: ret
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td

index 158744a609a1486d87ddce23887b32ce76ac0490..de0b36eadecfa02e251297976a5fe8b246a0e4f7 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -182,6 +182,19 @@ def int_wasm_extadd_pairwise_unsigned :
              [LLVMSubdivide2VectorType<0>],
              [IntrNoMem, IntrSpeculatable]>;
  
+//===----------------------------------------------------------------------===//
+// Relaxed SIMD intrinsics (experimental)
+//===----------------------------------------------------------------------===//
+
+def int_wasm_fma :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_fms :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
  //===----------------------------------------------------------------------===//
  // Thread-local storage intrinsics
  //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td

index c61a9cd0b967f2705209fce4c4e14005ef6bb74e..55ebc220f4094e1e6e2fd1c609989e8005d64b1a 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -26,6 +26,10 @@ def HasSIMD128 :
      Predicate<"Subtarget->hasSIMD128()">,
      AssemblerPredicate<(all_of FeatureSIMD128), "simd128">;
  
+def HasRelaxedSIMD :
+    Predicate<"Subtarget->hasRelaxedSIMD()">,
+    AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">;
+
  def HasAtomics :
      Predicate<"Subtarget->hasAtomics()">,
      AssemblerPredicate<(all_of FeatureAtomics), "atomics">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

index 551176fdd2336f3c10e02ac19d963b0994a4540c..4448faad295d31b2e2bf7c64630a6fff9f9c38e0 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -11,17 +11,34 @@
  ///
  //===----------------------------------------------------------------------===//
  
-// Instructions requiring HasSIMD128 and the simd128 prefix byte
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
-                  list<dag> pattern_r, string asmstr_r = "",
-                  string asmstr_s = "", bits<32> simdop = -1> {
+// Instructions using the SIMD opcode prefix and requiring one of the SIMD
+// feature predicates.
+multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                           list<dag> pattern_r, string asmstr_r,
+                           string asmstr_s, bits<32> simdop,
+                           Predicate simd_level> {
    defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
                !if(!ge(simdop, 0x100),
                    !or(0xfd0000, !and(0xffff, simdop)),
                    !or(0xfd00, !and(0xff, simdop)))>,
-            Requires<[HasSIMD128]>;
+            Requires<[simd_level]>;
  }
  
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                  list<dag> pattern_r, string asmstr_r = "",
+                  string asmstr_s = "", bits<32> simdop = -1> {
+  defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+                            asmstr_s, simdop, HasSIMD128>;
+}
+
+multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                     list<dag> pattern_r, string asmstr_r = "",
+                     string asmstr_s = "", bits<32> simdop = -1> {
+  defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+                            asmstr_s, simdop, HasRelaxedSIMD>;
+}
+
+
  defm "" : ARGUMENT<V128, v16i8>;
  defm "" : ARGUMENT<V128, v8i16>;
  defm "" : ARGUMENT<V128, v4i32>;
@@ -1307,3 +1324,23 @@ def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
  
  defm Q15MULR_SAT_S :
    SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
+
+//===----------------------------------------------------------------------===//
+// Fused Multiply- Add and Subtract (FMA/FMS)
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
+  defm FMA_#vec :
+    RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+              [(set (vec.vt V128:$dst), (int_wasm_fma
+                (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+              vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>;
+  defm FMS_#vec :
+    RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+              [(set (vec.vt V128:$dst), (int_wasm_fms
+                (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+              vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>;
+}
+
+defm "" : SIMDFM<F32x4, 0xaf, 0xb0>;
+defm "" : SIMDFM<F64x2, 0xcf, 0xd0>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

index 420422cf9cbb483e3a1b3cf512587d439c10914b..31f9afc2360bf7f006a60db218d7f6356cef3f9b 100644 (file)
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SLOW
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+relaxed-simd | FileCheck %s --check-prefixes=CHECK,SLOW
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+relaxed-simd -fast-isel | FileCheck %s
  
  ; Test that SIMD128 intrinsics lower as expected. These intrinsics are
  ; only expected to lower successfully if the simd128 attribute is
@@ -600,6 +600,30 @@ define <4 x float> @nearest_v4f32(<4 x float> %a) {
    ret <4 x float> %v
  }
  
+; CHECK-LABEL: fma_v4f32:
+; CHECK-NEXT: .functype fma_v4f32 (v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+  %v = call <4 x float> @llvm.wasm.fma.v4f32(
+    <4 x float> %a, <4 x float> %b, <4 x float> %c
+  )
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: fms_v4f32:
+; CHECK-NEXT: .functype fms_v4f32 (v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.fms.v4f32(<4 x float>, <4 x float>, <4 x float>)
+define <4 x float> @fms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+  %v = call <4 x float> @llvm.wasm.fms.v4f32(
+    <4 x float> %a, <4 x float> %b, <4 x float> %c
+  )
+  ret <4 x float> %v
+}
+
  ; ==============================================================================
  ; 2 x f64
  ; ==============================================================================
@@ -674,3 +698,27 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) {
    %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
    ret <2 x double> %v
  }
+
+; CHECK-LABEL: fma_v2f64:
+; CHECK-NEXT: .functype fma_v2f64 (v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.fma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+  %v = call <2 x double> @llvm.wasm.fma.v2f64(
+    <2 x double> %a, <2 x double> %b, <2 x double> %c
+  )
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: fms_v2f64:
+; CHECK-NEXT: .functype fms_v2f64 (v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.fms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.fms.v2f64(<2 x double>, <2 x double>, <2 x double>)
+define <2 x double> @fms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+  %v = call <2 x double> @llvm.wasm.fms.v2f64(
+    <2 x double> %a, <2 x double> %b, <2 x double> %c
+  )
+  ret <2 x double> %v
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s

index ab405406a3d119b2f217183b99b109664169e9b3..fa24bf9a55510d75abe65f0e5a83a069118198f7 100644 (file)
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128 < %s | FileCheck %s
+# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd < %s | FileCheck %s
  
  main:
      .functype main () -> ()
@@ -779,4 +779,16 @@ main:
      # CHECK: f64x2.convert_low_i32x4_u # encoding: [0xfd,0xff,0x01]
      f64x2.convert_low_i32x4_u
  
+    # CHECK: f32x4.fma # encoding: [0xfd,0xaf,0x01]
+    f32x4.fma
+
+    # CHECK: f32x4.fms # encoding: [0xfd,0xb0,0x01]
+    f32x4.fms
+
+    # CHECK: f64x2.fma # encoding: [0xfd,0xcf,0x01]
+    f64x2.fma
+
+    # CHECK: f64x2.fms # encoding: [0xfd,0xd0,0x01]
+    f64x2.fms
+
      end_function
author	Thomas Lively <tlively@google.com>
	Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)
committer	Thomas Lively <tlively@google.com>
	Thu, 23 Sep 2021 18:01:36 +0000 (11:01 -0700)
clang/include/clang/Basic/BuiltinsWebAssembly.def		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/test/CodeGen/builtins-wasm.c		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicsWebAssembly.td		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td		patch \| blob \| history
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll		patch \| blob \| history
llvm/test/MC/WebAssembly/simd-encodings.s		patch \| blob \| history