util: add support for detecting avx512 vector bit size
authorYonggang Luo <luoyonggang@gmail.com>
Tue, 30 Aug 2022 06:57:26 +0000 (14:57 +0800)
committerMarge Bot <emma+marge@anholt.net>
Fri, 2 Sep 2022 01:51:42 +0000 (01:51 +0000)
Default to 256 until we're confident llvmpipe with 512 is
as correct and not slower than 256

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17813>

src/gallium/auxiliary/gallivm/lp_bld_init.c
src/util/u_cpu_detect.c

index c5cb666..f77aac7 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "pipe/p_config.h"
 #include "pipe/p_compiler.h"
+#include "util/macros.h"
 #include "util/u_cpu_detect.h"
 #include "util/u_debug.h"
 #include "util/u_memory.h"
@@ -438,8 +439,11 @@ lp_build_init(void)
 
    lp_set_target_options();
 
+   // Default to 256 until we're confident llvmpipe with 512 is as correct and not slower than 256
+   lp_native_vector_width = MIN2(util_get_cpu_caps()->max_vector_bits, 256);
+
    lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
-                                                 util_get_cpu_caps()->max_vector_bits);
+                                                 lp_native_vector_width);
 
 #ifdef PIPE_ARCH_PPC_64
    /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
index c15ea11..dd799f2 100644 (file)
@@ -671,7 +671,9 @@ void check_max_vector_bits(void)
     */
    util_cpu_caps.max_vector_bits = 128;
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   if (util_cpu_caps.has_avx) {
+   if (util_cpu_caps.has_avx512f) {
+      util_cpu_caps.max_vector_bits = 512;
+   } else if (util_cpu_caps.has_avx) {
       util_cpu_caps.max_vector_bits = 256;
    }
 #endif