This also happens to fix bptc-float-modes on llvmpipe.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6774>
spec/arb_sparse_buffer/minmax: skip
spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip
spec/arb_texture_buffer_object/negative-unsupported: skip
-spec/arb_texture_compression_bptc/bptc-float-modes: fail
spec/arb_texture_cube_map/copyteximage cube samples=16: skip
spec/arb_texture_cube_map/copyteximage cube samples=2: skip
spec/arb_texture_cube_map/copyteximage cube samples=32: skip
summary:
name: results
---- --------
- pass: 23074
- fail: 198
+ pass: 23075
+ fail: 197
crash: 0
skip: 1433
timeout: 0
sse41_args = []
endif
+if cc.has_argument('-mf16c') and cpp.has_argument('-mf16c')
+ pre_args += '-DUSE_F16C'
+ c_args += '-mf16c'
+ cpp_args += '-mf16c'
+
+ # GCC on x86 (not x86_64) with -msse* assumes a 16 byte aligned stack, but
+ # that's not guaranteed (not sure if this also applies to -mf16c)
+ if host_machine.cpu_family() == 'x86'
+ c_args += '-mstackrealign'
+ cpp_args += '-mstackrealign'
+ endif
+endif
+
# Check for GCC style atomics
dep_atomic = null_dep
#include "util/u_math.h"
#include "util/u_half.h"
+#include "util/u_cpu_detect.h"
-int
-main(int argc, char **argv)
+static void
+test(void)
{
unsigned i;
unsigned roundtrip_fails = 0;
if(roundtrip_fails) {
printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
- return 1;
- } else {
- printf("Success!\n");
- return 0;
+ exit(1);
}
}
+
+int
+main(int argc, char **argv)
+{
+ assert(!util_cpu_caps.has_f16c);
+ test();
+
+ /* Test f16c. */
+ util_cpu_detect();
+ if (util_cpu_caps.has_f16c)
+ test();
+
+ printf("Success!\n");
+ return 0;
+}
* result in the same value as if the expression were executed on the GPU.
*/
uint16_t
-_mesa_float_to_half(float val)
+_mesa_float_to_half_slow(float val)
{
const fi_type fi = {val};
const int flt_m = fi.i & 0x7fffff;
}
uint16_t
-_mesa_float_to_float16_rtz(float val)
+_mesa_float_to_float16_rtz_slow(float val)
{
- return _mesa_float_to_half_rtz(val);
+ return _mesa_float_to_half_rtz_slow(val);
}
/**
* http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
*/
float
-_mesa_half_to_float(uint16_t val)
+_mesa_half_to_float_slow(uint16_t val)
{
return util_half_to_float(val);
}
#include <stdbool.h>
#include <stdint.h>
+#include <string.h>
+#include "util/u_cpu_detect.h"
+
+#ifdef USE_F16C
+#include <immintrin.h>
+#define F16C_NEAREST 0
+#define F16C_TRUNCATE 3
+#endif
#ifdef __cplusplus
extern "C" {
#define FP16_ONE ((uint16_t) 0x3c00)
#define FP16_ZERO ((uint16_t) 0)
-uint16_t _mesa_float_to_half(float val);
-float _mesa_half_to_float(uint16_t val);
+uint16_t _mesa_float_to_half_slow(float val);
+float _mesa_half_to_float_slow(uint16_t val);
uint8_t _mesa_half_to_unorm8(uint16_t v);
uint16_t _mesa_uint16_div_64k_to_half(uint16_t v);
/*
- * _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart
+ * _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart
* softfloat.h call. Still, softfloat.h conversion API is meant to be kept
* private. In other words, only use the API published here, instead of
* calling directly the softfloat.h one.
*/
-uint16_t _mesa_float_to_float16_rtz(float val);
+uint16_t _mesa_float_to_float16_rtz_slow(float val);
+
+static inline uint16_t
+_mesa_float_to_half(float val)
+{
+#ifdef USE_F16C
+ if (util_cpu_caps.has_f16c)
+ return _cvtss_sh(val, F16C_NEAREST);
+#endif
+ return _mesa_float_to_half_slow(val);
+}
+
+static inline float
+_mesa_half_to_float(uint16_t val)
+{
+#ifdef USE_F16C
+ if (util_cpu_caps.has_f16c)
+ return _cvtsh_ss(val);
+#endif
+ return _mesa_half_to_float_slow(val);
+}
+
+static inline uint16_t
+_mesa_float_to_float16_rtz(float val)
+{
+#ifdef USE_F16C
+ if (util_cpu_caps.has_f16c)
+ return _cvtss_sh(val, F16C_TRUNCATE);
+#endif
+ return _mesa_float_to_float16_rtz_slow(val);
+}
static inline uint16_t
_mesa_float_to_float16_rtne(float val)
* From f32_to_f16()
*/
uint16_t
-_mesa_float_to_half_rtz(float val)
+_mesa_float_to_half_rtz_slow(float val)
{
const fi_type fi = {val};
const uint32_t flt_m = fi.u & 0x7fffff;
double _mesa_double_fma_rtz(double a, double b, double c);
float _mesa_float_fma_rtz(float a, float b, float c);
float _mesa_double_to_f32(double x, bool rtz);
-uint16_t _mesa_float_to_half_rtz(float x);
+uint16_t _mesa_float_to_half_rtz_slow(float x);
#ifdef __cplusplus
} /* extern C */