[VP] llvm.vp.fma intrinsic and LangRef

author Craig Topper <craig.topper@sifive.com>

Mon, 7 Feb 2022 21:24:36 +0000 (13:24 -0800)

committer Craig Topper <craig.topper@sifive.com>

Mon, 7 Feb 2022 23:53:27 +0000 (15:53 -0800)
author Craig Topper <craig.topper@sifive.com>
Mon, 7 Feb 2022 21:24:36 +0000 (13:24 -0800)
committer Craig Topper <craig.topper@sifive.com>
Mon, 7 Feb 2022 23:53:27 +0000 (15:53 -0800)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst

index 41e4714..fab605f 100644 (file)
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18812,6 +18812,54 @@ Examples:
        %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
  
  
+.. _int_vp_fma:
+
+'``llvm.vp.fma.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x float>  @llvm.vp.fma.v16f32 (<16 x float> <left_op>, <16 x float> <middle_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x float>  @llvm.vp.fma.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <middle_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x double>  @llvm.vp.fma.v256f64 (<256 x double> <left_op>, <256 x double> <middle_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated floating-point fused multiply-add of two vectors of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The first three operands and the result have the same vector of floating-point type. The
+fourth operand is the vector mask and has the same number of elements as the
+result vector type. The fifth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.fma``' intrinsic performs floating-point fused multiply-add (:ref:`llvm.fma <int_fma>`)
+of the first, second, and third vector operand on each enabled lane.  The result on
+disabled lanes is undefined.  The operation is performed in the default
+floating-point environment.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+      %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
  
  .. _int_vp_reduce_add:
  
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td

index f5248e8..2a5d8dc 100644 (file)
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1504,6 +1504,13 @@ let IntrProperties =
                                  LLVMMatchType<0>,
                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                  llvm_i32_ty]>;
+
+  def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
  }
  // Shuffles.
  def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def

index 1abcbb8..c57486e 100644 (file)
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -211,6 +211,11 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
  
  #undef HELPER_REGISTER_BINARY_FP_VP
  
+// llvm.vp.fma(x,y,z,mask,vlen)
+BEGIN_REGISTER_VP(vp_fma, 3, 4, VP_FMA, -1)
+VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fma)
+END_REGISTER_VP(vp_fma, VP_FMA)
+
  ///// } Floating-Point Arithmetic
  
  ///// Memory Operations {
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp

index 9adcb21..440b121 100644 (file)
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -51,6 +51,9 @@ protected:
        Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode
            << ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) ";
  
+    Str << " declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, "
+                 "<8 x float>, <8 x i1>, i32) ";
+
      Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, "
             "<8 x i1>, i32) ";
      Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x "
author	Craig Topper <craig.topper@sifive.com>
	Mon, 7 Feb 2022 21:24:36 +0000 (13:24 -0800)
committer	Craig Topper <craig.topper@sifive.com>
	Mon, 7 Feb 2022 23:53:27 +0000 (15:53 -0800)
llvm/docs/LangRef.rst		patch \| blob \| history
llvm/include/llvm/IR/Intrinsics.td		patch \| blob \| history
llvm/include/llvm/IR/VPIntrinsics.def		patch \| blob \| history
llvm/unittests/IR/VPIntrinsicTest.cpp		patch \| blob \| history