%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+.. _int_get_active_lane_mask:
+
+'``llvm.get.active.lane.mask.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %base, i32 %n)
+ declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %base, i64 %n)
+ declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %n)
+ declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 %base, i64 %n)
+
+
+Overview:
+"""""""""
+
+Create a mask representing active and inactive vector lanes.
+
+
+Arguments:
+""""""""""
+
+Both operands have the same scalar integer type. The result is a vector with
+the i1 element type.
+
+Semantics:
+""""""""""
+
+The '``llvm.get.active.lane.mask.*``' intrinsics are semantically equivalent
+to:
+
+::
+
+ %m[i] = icmp ule (%base + i), %n
+
+where ``%m`` is a vector (mask) of active/inactive lanes with its elements
+indexed by ``i``, and ``%base``, ``%n`` are the two arguments to
+``llvm.get.active.lane.mask.*``, ``%imcp`` is an integer compare and ``ule``
+the unsigned less-than-equal comparison operator. Overflow cannot occur in
+``(%base + i)`` and its comparison against ``%n`` as it is performed in integer
+numbers and not in machine numbers. The above is equivalent to:
+
+::
+
+ %m = @llvm.get.active.lane.mask(%base, %n)
+
+This can, for example, be emitted by the loop vectorizer. Then, ``%base`` is
+the first element of the vector induction variable (VIV), and ``%n`` is the
+Back-edge Taken Count (BTC). Thus, these intrinsics perform an element-wise
+less than or equal comparison of VIV with BTC, producing a mask of true/false
+values representing active/inactive vector lanes, except if the VIV overflows
+in which case they return false in the lanes where the VIV overflows. The
+arguments are scalar types to accomodate scalable vector types, for which it is
+unknown what the type of the step vector needs to be that enumerate its
+lanes without overflow.
+
+This mask ``%m`` can e.g. be used in masked load/store instructions. These
+intrinsics provide a hint to the backend. I.e., for a vector loop, the
+back-edge taken count of the original scalar loop is explicit as the second
+argument.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %elem0, i64 429)
+ %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+
+
.. _int_mload_mstore:
Masked Vector Load and Store Intrinsics
"eh.exceptionpointer argument must be a catchpad", Call);
break;
}
+ case Intrinsic::get_active_lane_mask: {
+ Assert(Call.getType()->isVectorTy(), "get_active_lane_mask: must return a "
+ "vector", Call);
+ auto *ElemTy = Call.getType()->getScalarType();
+ Assert(ElemTy->isIntegerTy(1), "get_active_lane_mask: element type is not "
+ "i1", Call);
+ break;
+ }
case Intrinsic::masked_load: {
Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector",
Call);
--- /dev/null
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32, i32)
+
+define <4 x i32> @t1(i32 %IV, i32 %BTC) {
+; CHECK: get_active_lane_mask: element type is not i1
+; CHECK-NEXT: %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+
+ %res = call <4 x i32> @llvm.get.active.lane.mask.v4i32.i32(i32 %IV, i32 %BTC)
+ ret <4 x i32> %res
+}
+
+declare i32 @llvm.get.active.lane.mask.i32.i32(i32, i32)
+
+define i32 @t2(i32 %IV, i32 %BTC) {
+; CHECK: Intrinsic has incorrect return type!
+; CHECK-NEXT: i32 (i32, i32)* @llvm.get.active.lane.mask.i32.i32
+
+ %res = call i32 @llvm.get.active.lane.mask.i32.i32(i32 %IV, i32 %BTC)
+ ret i32 %res
+}