gallivm: use llvm intrinsics for 16-bit round/trunc/roundeven
authorDave Airlie <airlied@redhat.com>
Tue, 7 Sep 2021 01:17:39 +0000 (11:17 +1000)
committerMarge Bot <eric+marge@anholt.net>
Thu, 16 Sep 2021 04:15:41 +0000 (04:15 +0000)
Otherwise the inf translations don't seem to work, and the VK CTS
fails

Fixes VK CTS dEQP-VK.spirv_assembly.instruction.graphics.float16.arithmetic*

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11816>

src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_nir.c

index 3b4430e..acad6f5 100644 (file)
@@ -2030,6 +2030,12 @@ lp_build_trunc(struct lp_build_context *bld,
    assert(type.floating);
    assert(lp_check_value(type, a));
 
+   if (type.width == 16) {
+      char intrinsic[64];
+      lp_format_intrinsic(intrinsic, 64, "llvm.trunc", bld->vec_type);
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
+
    if (arch_rounding_available(type)) {
       return lp_build_round_arch(bld, a, LP_BUILD_ROUND_TRUNCATE);
    }
@@ -2083,6 +2089,12 @@ lp_build_round(struct lp_build_context *bld,
    assert(type.floating);
    assert(lp_check_value(type, a));
 
+   if (type.width == 16) {
+      char intrinsic[64];
+      lp_format_intrinsic(intrinsic, 64, "llvm.round", bld->vec_type);
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
+
    if (arch_rounding_available(type)) {
       return lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST);
    }
index a07603d..ca56330 100644 (file)
@@ -32,6 +32,7 @@
 #include "lp_bld_logic.h"
 #include "lp_bld_quad.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_intr.h"
 #include "lp_bld_struct.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_printf.h"
@@ -798,7 +799,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fround_even:
-      result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
+      if (src_bit_size[0] == 16) {
+        struct lp_build_context *bld = get_flt_bld(bld_base, 16);
+        char intrinsic[64];
+        lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
+        result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
+      } else
+        result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_frsq:
       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);