util: Move the detect of max_vector_bits into u_cpu_detect.c
authorYonggang Luo <luoyonggang@gmail.com>
Sat, 27 Aug 2022 09:11:35 +0000 (17:11 +0800)
committerMarge Bot <emma+marge@anholt.net>
Fri, 2 Sep 2022 01:51:42 +0000 (01:51 +0000)
has_avx2 implies has_avx, so have no need check then both

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17813>

src/gallium/auxiliary/gallivm/lp_bld_init.c
src/util/u_cpu_detect.c
src/util/u_cpu_detect.h

index 4683126..c5cb666 100644 (file)
@@ -438,17 +438,8 @@ lp_build_init(void)
 
    lp_set_target_options();
 
-   if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) {
-      lp_native_vector_width = 256;
-   } else {
-      /* Leave it at 128, even when no SIMD extensions are available.
-       * Really needs to be a multiple of 128 so can fit 4 floats.
-       */
-      lp_native_vector_width = 128;
-   }
-
    lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
-                                                 lp_native_vector_width);
+                                                 util_get_cpu_caps()->max_vector_bits);
 
 #ifdef PIPE_ARCH_PPC_64
    /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
index 9282a73..c15ea11 100644 (file)
@@ -663,6 +663,20 @@ void check_cpu_caps_override(void)
 #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
 }
 
+static
+void check_max_vector_bits(void)
+{
+   /* Leave it at 128, even when no SIMD extensions are available.
+    * Really needs to be a multiple of 128 so can fit 4 floats.
+    */
+   util_cpu_caps.max_vector_bits = 128;
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   if (util_cpu_caps.has_avx) {
+      util_cpu_caps.max_vector_bits = 256;
+   }
+#endif
+}
+
 void _util_cpu_detect_once(void);
 
 void
@@ -895,6 +909,9 @@ _util_cpu_detect_once(void)
 
    check_cpu_caps_override();
 
+   /* max_vector_bits should be checked after cpu caps override */
+   check_max_vector_bits();
+
    get_cpu_topology();
 
    if (debug_get_option_dump_cpu()) {
index 974e35d..6d1bf8c 100644 (file)
@@ -120,6 +120,7 @@ struct util_cpu_caps_t {
 
    unsigned num_L3_caches;
    unsigned num_cpu_mask_bits;
+   unsigned max_vector_bits;
 
    uint16_t cpu_to_L3[UTIL_MAX_CPUS];
    /* Affinity masks for each L3 cache. */