New intrinsic @llvm.get.active.lane.mask()

author Sjoerd Meijer <sjoerd.meijer@arm.com>

Fri, 29 May 2020 07:27:22 +0000 (08:27 +0100)

committer Sjoerd Meijer <sjoerd.meijer@arm.com>

Fri, 29 May 2020 07:51:40 +0000 (08:51 +0100)
author Sjoerd Meijer <sjoerd.meijer@arm.com>
Fri, 29 May 2020 07:27:22 +0000 (08:27 +0100)
committer Sjoerd Meijer <sjoerd.meijer@arm.com>
Fri, 29 May 2020 07:51:40 +0000 (08:51 +0100)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst

index 0e18dcc9f99e853674f8a86a648ce5f88d66e589..db19c649c2fca902b578d75ddd14bd76c8d1ccc3 100644 (file)
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16366,6 +16366,81 @@ Examples:
        %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
  
  
+.. _int_get_active_lane_mask:
+
+'``llvm.get.active.lane.mask.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %base, i32 %n)
+      declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %base, i64 %n)
+      declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %n)
+      declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 %base, i64 %n)
+
+
+Overview:
+"""""""""
+
+Create a mask representing active and inactive vector lanes.
+
+
+Arguments:
+""""""""""
+
+Both operands have the same scalar integer type. The result is a vector with
+the i1 element type.
+
+Semantics:
+""""""""""
+
+The '``llvm.get.active.lane.mask.*``' intrinsics are semantically equivalent
+to:
+
+::
+
+      %m[i] = icmp ule (%base + i), %n
+
+where ``%m`` is a vector (mask) of active/inactive lanes with its elements
+indexed by ``i``,  and ``%base``, ``%n`` are the two arguments to
+``llvm.get.active.lane.mask.*``, ``%imcp`` is an integer compare and ``ule``
+the unsigned less-than-equal comparison operator.  Overflow cannot occur in
+``(%base + i)`` and its comparison against ``%n`` as it is performed in integer
+numbers and not in machine numbers.  The above is equivalent to:
+
+::
+
+      %m = @llvm.get.active.lane.mask(%base, %n)
+
+This can, for example, be emitted by the loop vectorizer. Then, ``%base`` is
+the first element of the vector induction variable (VIV), and ``%n`` is the
+Back-edge Taken Count (BTC). Thus, these intrinsics perform an element-wise
+less than or equal comparison of VIV with BTC, producing a mask of true/false
+values representing active/inactive vector lanes, except if the VIV overflows
+in which case they return false in the lanes where the VIV overflows.  The
+arguments are scalar types to accomodate scalable vector types, for which it is
+unknown what the type of the step vector needs to be that enumerate its
+lanes without overflow.
+
+This mask ``%m`` can e.g. be used in masked load/store instructions. These
+intrinsics provide a hint to the backend. I.e., for a vector loop, the
+back-edge taken count of the original scalar loop is explicit as the second
+argument.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %elem0, i64 429)
+      %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+
+
  .. _int_mload_mstore:
  
  Masked Vector Load and Store Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td

index 78409df8f816a0219e2ad0962cbf9ffd09624f3d..40d4bc5ede3afcf95a0889d616be1abaad5f1cbc 100644 (file)
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1294,6 +1294,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
  
  }
  
+def int_get_active_lane_mask:
+  Intrinsic<[llvm_anyvector_ty],
+            [llvm_anyint_ty, LLVMMatchType<1>],
+            [IntrNoMem, IntrNoSync, IntrWillReturn]>;
  
  //===-------------------------- Masked Intrinsics -------------------------===//
  //
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp

index e0d28b35efddfc6eb5bd8384d0d74340fd1819ef..cb96c7ae515a37f670c60082d81b9a62ce3c3b5d 100644 (file)
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4812,6 +4812,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
             "eh.exceptionpointer argument must be a catchpad", Call);
      break;
    }
+  case Intrinsic::get_active_lane_mask: {
+    Assert(Call.getType()->isVectorTy(), "get_active_lane_mask: must return a "
+           "vector", Call);
+    auto *ElemTy = Call.getType()->getScalarType();
+    Assert(ElemTy->isIntegerTy(1), "get_active_lane_mask: element type is not "
+           "i1", Call);
+    break;
+  }
    case Intrinsic::masked_load: {
      Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector",
             Call);
diff --git a/llvm/test/Verifier/get-active-lane-mask.ll b/llvm/test/Verifier/get-active-lane-mask.ll

new file mode 100644 (file)

index 0000000..94d819b
--- /dev/null
+++ b/llvm/test/Verifier/get-active-lane-mask.ll
@@ -0,0 +1,21 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32)
+
+define <4 x i32> @t1(i32 %IV, i32 %BTC) {
+; CHECK:      get_active_lane_mask: element type is not i1
+; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+
+  %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+  ret <4 x i32> %res
+}
+
+declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32)
+
+define i32 @t2(i32 %IV, i32 %BTC) {
+; CHECK:      Intrinsic has incorrect return type!
+; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32
+
+  %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC)
+  ret i32 %res
+}
author	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Fri, 29 May 2020 07:27:22 +0000 (08:27 +0100)
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Fri, 29 May 2020 07:51:40 +0000 (08:51 +0100)
llvm/docs/LangRef.rst		patch \| blob \| history
llvm/include/llvm/IR/Intrinsics.td		patch \| blob \| history
llvm/lib/IR/Verifier.cpp		patch \| blob \| history
llvm/test/Verifier/get-active-lane-mask.ll	[new file with mode: 0644]	patch \| blob