From d240fb150fcaff3fb62a0b8c2746bbe9f8301ae9 Mon Sep 17 00:00:00 2001 From: Yonggang Luo Date: Sat, 27 Aug 2022 17:11:35 +0800 Subject: [PATCH] util: Move the detect of max_vector_bits into u_cpu_detect.c has_avx2 implies has_avx, so have no need check then both Signed-off-by: Yonggang Luo Reviewed-by: Jose Fonseca Part-of: --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 11 +---------- src/util/u_cpu_detect.c | 17 +++++++++++++++++ src/util/u_cpu_detect.h | 1 + 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 4683126..c5cb666 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -438,17 +438,8 @@ lp_build_init(void) lp_set_target_options(); - if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) { - lp_native_vector_width = 256; - } else { - /* Leave it at 128, even when no SIMD extensions are available. - * Really needs to be a multiple of 128 so can fit 4 floats. - */ - lp_native_vector_width = 128; - } - lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", - lp_native_vector_width); + util_get_cpu_caps()->max_vector_bits); #ifdef PIPE_ARCH_PPC_64 /* Set the NJ bit in VSCR to 0 so denormalized values are handled as diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index 9282a73..c15ea11 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -663,6 +663,20 @@ void check_cpu_caps_override(void) #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ } +static +void check_max_vector_bits(void) +{ + /* Leave it at 128, even when no SIMD extensions are available. + * Really needs to be a multiple of 128 so can fit 4 floats. + */ + util_cpu_caps.max_vector_bits = 128; +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if (util_cpu_caps.has_avx) { + util_cpu_caps.max_vector_bits = 256; + } +#endif +} + void _util_cpu_detect_once(void); void @@ -895,6 +909,9 @@ _util_cpu_detect_once(void) check_cpu_caps_override(); + /* max_vector_bits should be checked after cpu caps override */ + check_max_vector_bits(); + get_cpu_topology(); if (debug_get_option_dump_cpu()) { diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index 974e35d..6d1bf8c 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -120,6 +120,7 @@ struct util_cpu_caps_t { unsigned num_L3_caches; unsigned num_cpu_mask_bits; + unsigned max_vector_bits; uint16_t cpu_to_L3[UTIL_MAX_CPUS]; /* Affinity masks for each L3 cache. */ -- 2.7.4