util: implement f16c - fast half<->float conversions

author Marek Olšák <marek.olsak@amd.com>

Fri, 18 Sep 2020 09:21:09 +0000 (05:21 -0400)

committer Marge Bot <eric+marge@anholt.net>

Wed, 30 Sep 2020 16:28:24 +0000 (16:28 +0000)
author Marek Olšák <marek.olsak@amd.com>
Fri, 18 Sep 2020 09:21:09 +0000 (05:21 -0400)
committer Marge Bot <eric+marge@anholt.net>
Wed, 30 Sep 2020 16:28:24 +0000 (16:28 +0000)
diff --git a/.gitlab-ci/piglit/quick_gl.txt b/.gitlab-ci/piglit/quick_gl.txt

index a24ae0d..104d2f6 100644 (file)
--- a/.gitlab-ci/piglit/quick_gl.txt
+++ b/.gitlab-ci/piglit/quick_gl.txt
@@ -738,7 +738,6 @@ spec/arb_sparse_buffer/commit: skip
  spec/arb_sparse_buffer/minmax: skip
  spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip
  spec/arb_texture_buffer_object/negative-unsupported: skip
-spec/arb_texture_compression_bptc/bptc-float-modes: fail
  spec/arb_texture_cube_map/copyteximage cube samples=16: skip
  spec/arb_texture_cube_map/copyteximage cube samples=2: skip
  spec/arb_texture_cube_map/copyteximage cube samples=32: skip
@@ -1656,8 +1655,8 @@ wgl/wgl-sanity: skip
  summary:
         name:  results
         ----  --------
-       pass:    23074
-       fail:      198
+       pass:    23075
+       fail:      197
        crash:        0
         skip:     1433
      timeout:        0
diff --git a/meson.build b/meson.build

index 76f72a9..b855710 100644 (file)
--- a/meson.build
+++ b/meson.build
@@ -1111,6 +1111,19 @@ else
    sse41_args = []
  endif
  
+if cc.has_argument('-mf16c') and cpp.has_argument('-mf16c')
+  pre_args += '-DUSE_F16C'
+  c_args += '-mf16c'
+  cpp_args += '-mf16c'
+
+  # GCC on x86 (not x86_64) with -msse* assumes a 16 byte aligned stack, but
+  # that's not guaranteed (not sure if this also applies to -mf16c)
+  if host_machine.cpu_family() == 'x86'
+    c_args += '-mstackrealign'
+    cpp_args += '-mstackrealign'
+  endif
+endif
+
  # Check for GCC style atomics
  dep_atomic = null_dep
  
diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c

index 48a9a2d..fb4ce6e 100644 (file)
--- a/src/gallium/tests/unit/u_half_test.c
+++ b/src/gallium/tests/unit/u_half_test.c
@@ -4,9 +4,10 @@
  
  #include "util/u_math.h"
  #include "util/u_half.h"
+#include "util/u_cpu_detect.h"
  
-int
-main(int argc, char **argv)
+static void
+test(void)
  {
     unsigned i;
     unsigned roundtrip_fails = 0;
@@ -28,9 +29,21 @@ main(int argc, char **argv)
  
     if(roundtrip_fails) {
        printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
-      return 1;
-   } else {
-      printf("Success!\n");
-      return 0;
+      exit(1);
     }
  }
+
+int
+main(int argc, char **argv)
+{
+   assert(!util_cpu_caps.has_f16c);
+   test();
+
+   /* Test f16c. */
+   util_cpu_detect();
+   if (util_cpu_caps.has_f16c)
+      test();
+
+   printf("Success!\n");
+   return 0;
+}
diff --git a/src/util/half_float.c b/src/util/half_float.c

index aae690a..61b512f 100644 (file)
--- a/src/util/half_float.c
+++ b/src/util/half_float.c
@@ -54,7 +54,7 @@ typedef union { float f; int32_t i; uint32_t u; } fi_type;
   *     result in the same value as if the expression were executed on the GPU.
   */
  uint16_t
-_mesa_float_to_half(float val)
+_mesa_float_to_half_slow(float val)
  {
     const fi_type fi = {val};
     const int flt_m = fi.i & 0x7fffff;
@@ -129,9 +129,9 @@ _mesa_float_to_half(float val)
  }
  
  uint16_t
-_mesa_float_to_float16_rtz(float val)
+_mesa_float_to_float16_rtz_slow(float val)
  {
-    return _mesa_float_to_half_rtz(val);
+    return _mesa_float_to_half_rtz_slow(val);
  }
  
  /**
@@ -140,7 +140,7 @@ _mesa_float_to_float16_rtz(float val)
   * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
   */
  float
-_mesa_half_to_float(uint16_t val)
+_mesa_half_to_float_slow(uint16_t val)
  {
     return util_half_to_float(val);
  }
diff --git a/src/util/half_float.h b/src/util/half_float.h

index c9fad9a..6f9a405 100644 (file)
--- a/src/util/half_float.h
+++ b/src/util/half_float.h
@@ -28,6 +28,14 @@
  
  #include <stdbool.h>
  #include <stdint.h>
+#include <string.h>
+#include "util/u_cpu_detect.h"
+
+#ifdef USE_F16C
+#include <immintrin.h>
+#define F16C_NEAREST 0
+#define F16C_TRUNCATE 3
+#endif
  
  #ifdef __cplusplus
  extern "C" {
@@ -36,18 +44,48 @@ extern "C" {
  #define FP16_ONE     ((uint16_t) 0x3c00)
  #define FP16_ZERO    ((uint16_t) 0)
  
-uint16_t _mesa_float_to_half(float val);
-float _mesa_half_to_float(uint16_t val);
+uint16_t _mesa_float_to_half_slow(float val);
+float _mesa_half_to_float_slow(uint16_t val);
  uint8_t _mesa_half_to_unorm8(uint16_t v);
  uint16_t _mesa_uint16_div_64k_to_half(uint16_t v);
  
  /*
- * _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart
+ * _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart
   * softfloat.h call. Still, softfloat.h conversion API is meant to be kept
   * private. In other words, only use the API published here, instead of
   * calling directly the softfloat.h one.
   */
-uint16_t _mesa_float_to_float16_rtz(float val);
+uint16_t _mesa_float_to_float16_rtz_slow(float val);
+
+static inline uint16_t
+_mesa_float_to_half(float val)
+{
+#ifdef USE_F16C
+   if (util_cpu_caps.has_f16c)
+      return _cvtss_sh(val, F16C_NEAREST);
+#endif
+   return _mesa_float_to_half_slow(val);
+}
+
+static inline float
+_mesa_half_to_float(uint16_t val)
+{
+#ifdef USE_F16C
+   if (util_cpu_caps.has_f16c)
+      return _cvtsh_ss(val);
+#endif
+   return _mesa_half_to_float_slow(val);
+}
+
+static inline uint16_t
+_mesa_float_to_float16_rtz(float val)
+{
+#ifdef USE_F16C
+   if (util_cpu_caps.has_f16c)
+      return _cvtss_sh(val, F16C_TRUNCATE);
+#endif
+   return _mesa_float_to_float16_rtz_slow(val);
+}
  
  static inline uint16_t
  _mesa_float_to_float16_rtne(float val)
diff --git a/src/util/softfloat.c b/src/util/softfloat.c

index 365b15b..50cf098 100644 (file)
--- a/src/util/softfloat.c
+++ b/src/util/softfloat.c
@@ -1435,7 +1435,7 @@ _mesa_double_to_f32(double val, bool rtz)
   * From f32_to_f16()
   */
  uint16_t
-_mesa_float_to_half_rtz(float val)
+_mesa_float_to_half_rtz_slow(float val)
  {
      const fi_type fi = {val};
      const uint32_t flt_m = fi.u & 0x7fffff;
diff --git a/src/util/softfloat.h b/src/util/softfloat.h

index 4e48c65..2e254e2 100644 (file)
--- a/src/util/softfloat.h
+++ b/src/util/softfloat.h
@@ -56,7 +56,7 @@ double _mesa_double_mul_rtz(double a, double b);
  double _mesa_double_fma_rtz(double a, double b, double c);
  float _mesa_float_fma_rtz(float a, float b, float c);
  float _mesa_double_to_f32(double x, bool rtz);
-uint16_t _mesa_float_to_half_rtz(float x);
+uint16_t _mesa_float_to_half_rtz_slow(float x);
  
  #ifdef __cplusplus
  } /* extern C */
author	Marek Olšák <marek.olsak@amd.com>
	Fri, 18 Sep 2020 09:21:09 +0000 (05:21 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Wed, 30 Sep 2020 16:28:24 +0000 (16:28 +0000)
.gitlab-ci/piglit/quick_gl.txt		patch \| blob \| history
meson.build		patch \| blob \| history
src/gallium/tests/unit/u_half_test.c		patch \| blob \| history
src/util/half_float.c		patch \| blob \| history
src/util/half_float.h		patch \| blob \| history
src/util/softfloat.c		patch \| blob \| history
src/util/softfloat.h		patch \| blob \| history