From ea2cdbf5e655ce157c2224f7d427229be55551cc Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 2 Sep 2021 09:53:53 +0200 Subject: [PATCH] [VP] Declaration and docs for vp.select intrinsic llvm.vp.select extends the regular select instruction with an explicit vector length (%evl). All lanes with indexes at and above %evl are undefined. Lanes below %evl are taken from the first input where the mask is true and from the second input otherwise. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D105351 --- llvm/docs/LangRef.rst | 58 +++++++++++++++++++++++++++++++++++ llvm/include/llvm/IR/Intrinsics.td | 6 ++++ llvm/include/llvm/IR/VPIntrinsics.def | 10 ++++++ llvm/lib/IR/IntrinsicInst.cpp | 3 ++ llvm/unittests/IR/VPIntrinsicTest.cpp | 2 ++ 5 files changed, 79 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1fe8f9b..fb11821 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17745,6 +17745,64 @@ The use of an effective %evl is discouraged for those targets. The function ``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target has native support for %evl. +.. _int_vp_select: + +'``llvm.vp.select.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> , <16 x i32> , <16 x i32> , i32 ) + declare @llvm.vp.select.nxv4i64 ( , , , i32 ) + +Overview: +""""""""" + +The '``llvm.vp.select``' intrinsic is used to choose one value based on a +condition vector, without IR-level branching. + +Arguments: +"""""""""" + +The first operand is a vector of ``i1`` and indicates the condition. The +second operand is the value that is selected where the condition vector is +true. The third operand is the value that is selected where the condition +vector is false. The vectors must be of the same size. The fourth operand is +the explicit vector length. + +#. The optional ``fast-math flags`` marker indicates that the select has one or + more :ref:`fast-math flags `. These are optimization hints to + enable otherwise unsafe floating-point optimizations. Fast-math flags are + only valid for selects that return a floating-point scalar or vector type, + or an array (nested to any depth) of floating-point scalar or vector types. + +Semantics: +"""""""""" + +The intrinsic selects lanes from the second and third operand depending on a +condition vector. + +All result lanes at positions greater or equal than ``%evl`` are undefined. +For all lanes below ``%evl`` where the condition vector is true the lane is +taken from the second operand. Otherwise, the lane is taken from the third +operand. + +Example: +"""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false, i32 %evl) + + ;;; Expansion. + ;; Any result is legal on lanes at and above %evl. + %also.r = select <4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false + + .. _int_vp_add: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 9b12aea..5f35565 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1507,6 +1507,12 @@ let IntrProperties = LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; } +// Shuffles. +def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; // Reductions let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 95de839..026fa3c 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -333,6 +333,16 @@ HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL, ///// } Reduction +///// Shuffles { + +// llvm.vp.select(mask,on_true,on_false,vlen) +BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3) +// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4) +// END_REGISTER_CASES(vp_select, VP_SELECT) +END_REGISTER_VP_INTRINSIC(vp_select) + +///// } Shuffles + #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC #undef BEGIN_REGISTER_VP_SDNODE diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 7a7ff91..56dfedc 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -482,6 +482,9 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy); break; } + case Intrinsic::vp_select: + VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()}); + break; case Intrinsic::vp_load: VPFunc = Intrinsic::getDeclaration( M, VPID, diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index 223e3ad..32a0726b 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -68,6 +68,8 @@ protected: Str << " declare float @llvm.vp.reduce." << ReductionOpcode << ".v8f32(float, <8 x float>, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x " + "i32>, i32)"; return parseAssemblyString(Str.str(), Err, C); } }; -- 2.7.4