lp_set_target_options();
- if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) {
- lp_native_vector_width = 256;
- } else {
- /* Leave it at 128, even when no SIMD extensions are available.
- * Really needs to be a multiple of 128 so can fit 4 floats.
- */
- lp_native_vector_width = 128;
- }
-
lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
- lp_native_vector_width);
+ util_get_cpu_caps()->max_vector_bits);
#ifdef PIPE_ARCH_PPC_64
/* Set the NJ bit in VSCR to 0 so denormalized values are handled as
#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
}
+static
+void check_max_vector_bits(void)
+{
+ /* Leave it at 128, even when no SIMD extensions are available.
+ * Really needs to be a multiple of 128 so can fit 4 floats.
+ */
+ util_cpu_caps.max_vector_bits = 128;
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ if (util_cpu_caps.has_avx) {
+ util_cpu_caps.max_vector_bits = 256;
+ }
+#endif
+}
+
void _util_cpu_detect_once(void);
void
check_cpu_caps_override();
+ /* max_vector_bits should be checked after cpu caps override */
+ check_max_vector_bits();
+
get_cpu_topology();
if (debug_get_option_dump_cpu()) {
unsigned num_L3_caches;
unsigned num_cpu_mask_bits;
+ unsigned max_vector_bits;
uint16_t cpu_to_L3[UTIL_MAX_CPUS];
/* Affinity masks for each L3 cache. */