1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
25 #include <dl-hwcap2.h>
27 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
32 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
35 GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
36 |= bit_arch_Prefer_MAP_32BIT_EXEC;
41 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
43 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
50 update_active (struct cpu_features *cpu_features)
52 /* Copy the cpuid bits to active bits for CPU featuress whose usability
53 in user space can be detected without additonal OS support. */
54 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
55 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
56 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
57 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
58 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
59 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
60 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
61 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
62 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
63 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
64 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
65 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
66 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
67 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
68 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
69 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
70 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
71 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
72 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
73 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
74 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
75 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
76 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
77 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
78 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
79 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
80 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
81 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
82 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
83 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
84 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
85 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
86 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
87 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
88 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
89 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
90 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
91 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
92 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
93 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
94 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
99 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
100 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
101 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
102 CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
103 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
104 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
106 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
107 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
109 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
110 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
113 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
114 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
117 /* Can we call xgetbv? */
118 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
121 unsigned int xcrhigh;
122 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
123 /* Is YMM and XMM state usable? */
124 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
125 == (bit_YMM_state | bit_XMM_state))
127 /* Determine if AVX is usable. */
128 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
130 CPU_FEATURE_SET (cpu_features, AVX);
131 /* The following features depend on AVX being usable. */
132 /* Determine if AVX2 is usable. */
133 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
135 CPU_FEATURE_SET (cpu_features, AVX2);
137 /* Unaligned load with 256-bit AVX registers are faster
138 on Intel/AMD processors with AVX2. */
139 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
140 |= bit_arch_AVX_Fast_Unaligned_Load;
142 /* Determine if AVX-VNNI is usable. */
143 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
144 /* Determine if FMA is usable. */
145 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
146 /* Determine if VAES is usable. */
147 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
148 /* Determine if VPCLMULQDQ is usable. */
149 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
150 /* Determine if XOP is usable. */
151 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
152 /* Determine if F16C is usable. */
153 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
156 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
157 ZMM16-ZMM31 state are enabled. */
158 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
159 | bit_ZMM16_31_state))
160 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
162 /* Determine if AVX512F is usable. */
163 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
165 CPU_FEATURE_SET (cpu_features, AVX512F);
166 /* Determine if AVX512CD is usable. */
167 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
168 /* Determine if AVX512ER is usable. */
169 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
170 /* Determine if AVX512PF is usable. */
171 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
172 /* Determine if AVX512VL is usable. */
173 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
174 /* Determine if AVX512DQ is usable. */
175 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
176 /* Determine if AVX512BW is usable. */
177 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
178 /* Determine if AVX512_4FMAPS is usable. */
179 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
180 /* Determine if AVX512_4VNNIW is usable. */
181 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
182 /* Determine if AVX512_BITALG is usable. */
183 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
184 /* Determine if AVX512_IFMA is usable. */
185 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
186 /* Determine if AVX512_VBMI is usable. */
187 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
188 /* Determine if AVX512_VBMI2 is usable. */
189 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
190 /* Determine if is AVX512_VNNI usable. */
191 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
192 /* Determine if AVX512_VPOPCNTDQ is usable. */
193 CPU_FEATURE_SET_ACTIVE (cpu_features,
195 /* Determine if AVX512_VP2INTERSECT is usable. */
196 CPU_FEATURE_SET_ACTIVE (cpu_features,
197 AVX512_VP2INTERSECT);
198 /* Determine if AVX512_BF16 is usable. */
199 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
200 /* Determine if AVX512_FP16 is usable. */
201 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
206 /* Are XTILECFG and XTILEDATA states usable? */
207 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
208 == (bit_XTILECFG_state | bit_XTILEDATA_state))
210 /* Determine if AMX_BF16 is usable. */
211 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
212 /* Determine if AMX_TILE is usable. */
213 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
214 /* Determine if AMX_INT8 is usable. */
215 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
216 /* Determine if AMX_FP16 is usable. */
217 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
220 /* These features are usable only when OSXSAVE is enabled. */
221 CPU_FEATURE_SET (cpu_features, XSAVE);
222 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
223 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
224 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
225 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
227 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
228 size + integer register save size and align it to 64 bytes. */
229 if (cpu_features->basic.max_cpuid >= 0xd)
231 unsigned int eax, ebx, ecx, edx;
233 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
236 unsigned int xsave_state_full_size
237 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
239 cpu_features->xsave_state_size
240 = xsave_state_full_size;
241 cpu_features->xsave_state_full_size
242 = xsave_state_full_size;
244 /* Check if XSAVEC is available. */
245 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
247 unsigned int xstate_comp_offsets[32];
248 unsigned int xstate_comp_sizes[32];
251 xstate_comp_offsets[0] = 0;
252 xstate_comp_offsets[1] = 160;
253 xstate_comp_offsets[2] = 576;
254 xstate_comp_sizes[0] = 160;
255 xstate_comp_sizes[1] = 256;
257 for (i = 2; i < 32; i++)
259 if ((STATE_SAVE_MASK & (1 << i)) != 0)
261 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
262 xstate_comp_sizes[i] = eax;
267 xstate_comp_sizes[i] = 0;
272 xstate_comp_offsets[i]
273 = (xstate_comp_offsets[i - 1]
274 + xstate_comp_sizes[i -1]);
275 if ((ecx & (1 << 1)) != 0)
276 xstate_comp_offsets[i]
277 = ALIGN_UP (xstate_comp_offsets[i], 64);
283 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
286 cpu_features->xsave_state_size
287 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
288 CPU_FEATURE_SET (cpu_features, XSAVEC);
295 /* Determine if PKU is usable. */
296 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
297 CPU_FEATURE_SET (cpu_features, PKU);
299 /* Determine if Key Locker instructions are usable. */
300 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
302 CPU_FEATURE_SET (cpu_features, AESKLE);
303 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
304 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
307 dl_check_hwcap2 (cpu_features);
309 cpu_features->isa_1 = get_isa_level (cpu_features);
313 get_extended_indices (struct cpu_features *cpu_features)
315 unsigned int eax, ebx, ecx, edx;
316 __cpuid (0x80000000, eax, ebx, ecx, edx);
317 if (eax >= 0x80000001)
319 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
320 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
321 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
322 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
323 if (eax >= 0x80000007)
325 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
326 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
327 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
328 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
329 if (eax >= 0x80000008)
331 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
332 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
333 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
334 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
338 get_common_indices (struct cpu_features *cpu_features,
339 unsigned int *family, unsigned int *model,
340 unsigned int *extended_model, unsigned int *stepping)
346 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
347 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
348 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
349 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
350 *family = (eax >> 8) & 0x0f;
351 *model = (eax >> 4) & 0x0f;
352 *extended_model = (eax >> 12) & 0xf0;
353 *stepping = eax & 0x0f;
356 *family += (eax >> 20) & 0xff;
357 *model += *extended_model;
361 if (cpu_features->basic.max_cpuid >= 7)
364 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
365 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
366 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
367 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
369 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
370 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
371 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
372 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
375 if (cpu_features->basic.max_cpuid >= 0xd)
376 __cpuid_count (0xd, 1,
377 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
378 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
379 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
380 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
382 if (cpu_features->basic.max_cpuid >= 0x14)
383 __cpuid_count (0x14, 0,
384 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
385 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
386 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
387 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
389 if (cpu_features->basic.max_cpuid >= 0x19)
390 __cpuid_count (0x19, 0,
391 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
392 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
393 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
394 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
396 dl_check_minsigstacksize (cpu_features);
399 _Static_assert (((index_arch_Fast_Unaligned_Load
400 == index_arch_Fast_Unaligned_Copy)
401 && (index_arch_Fast_Unaligned_Load
402 == index_arch_Prefer_PMINUB_for_stringop)
403 && (index_arch_Fast_Unaligned_Load
404 == index_arch_Slow_SSE4_2)
405 && (index_arch_Fast_Unaligned_Load
406 == index_arch_Fast_Rep_String)
407 && (index_arch_Fast_Unaligned_Load
408 == index_arch_Fast_Copy_Backward)),
409 "Incorrect index_arch_Fast_Unaligned_Load");
412 init_cpu_features (struct cpu_features *cpu_features)
414 unsigned int ebx, ecx, edx;
415 unsigned int family = 0;
416 unsigned int model = 0;
417 unsigned int stepping = 0;
418 enum cpu_features_kind kind;
421 if (__get_cpuid_max (0, 0) == 0)
423 kind = arch_kind_other;
428 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
430 /* This spells out "GenuineIntel". */
431 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
433 unsigned int extended_model;
435 kind = arch_kind_intel;
437 get_common_indices (cpu_features, &family, &model, &extended_model,
440 get_extended_indices (cpu_features);
442 update_active (cpu_features);
446 model += extended_model;
451 /* BSF is slow on Atom. */
452 cpu_features->preferred[index_arch_Slow_BSF]
453 |= bit_arch_Slow_BSF;
457 /* Knights Landing. Enable Silvermont optimizations. */
460 /* Unaligned load versions are faster than SSSE3
465 /* Unaligned load versions are faster than SSSE3
471 /* Airmont is a die shrink of Silvermont. */
477 /* Unaligned load versions are faster than SSSE3
479 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
480 |= (bit_arch_Fast_Unaligned_Load
481 | bit_arch_Fast_Unaligned_Copy
482 | bit_arch_Prefer_PMINUB_for_stringop
483 | bit_arch_Slow_SSE4_2);
489 /* Enable rep string instructions, unaligned load, unaligned
490 copy, pminub and avoid SSE 4.2 on Tremont. */
491 cpu_features->preferred[index_arch_Fast_Rep_String]
492 |= (bit_arch_Fast_Rep_String
493 | bit_arch_Fast_Unaligned_Load
494 | bit_arch_Fast_Unaligned_Copy
495 | bit_arch_Prefer_PMINUB_for_stringop
496 | bit_arch_Slow_SSE4_2);
500 /* Unknown family 0x06 processors. Assuming this is one
501 of Core i3/i5/i7 processors if AVX is available. */
502 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
513 /* Rep string instructions, unaligned load, unaligned copy,
514 and pminub are fast on Intel Core i3, i5 and i7. */
515 cpu_features->preferred[index_arch_Fast_Rep_String]
516 |= (bit_arch_Fast_Rep_String
517 | bit_arch_Fast_Unaligned_Load
518 | bit_arch_Fast_Unaligned_Copy
519 | bit_arch_Prefer_PMINUB_for_stringop);
523 /* Disable TSX on some processors to avoid TSX on kernels that
524 weren't updated with the latest microcode package (which
525 disables broken feature by default). */
533 /* NB: Although the errata documents that for model == 0x8e,
534 only 0xb stepping or lower are impacted, the intention of
535 the errata was to disable TSX on all client processors on
536 all steppings. Include 0xc stepping which is an Intel
537 Core i7-8665U, a client mobile processor. */
545 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
546 processors listed in:
548 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
551 CPU_FEATURE_UNSET (cpu_features, HLE);
552 CPU_FEATURE_UNSET (cpu_features, RTM);
553 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
557 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
564 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
565 with stepping >= 4) to avoid TSX on kernels that weren't
566 updated with the latest microcode package (which disables
567 broken feature by default). */
568 CPU_FEATURE_UNSET (cpu_features, RTM);
574 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
575 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
576 frequency if AVX512ER isn't available. */
577 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
578 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
579 |= bit_arch_Prefer_No_VZEROUPPER;
582 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
583 when ZMM load and store instructions are used. */
584 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
585 cpu_features->preferred[index_arch_Prefer_No_AVX512]
586 |= bit_arch_Prefer_No_AVX512;
588 /* Avoid RTM abort triggered by VZEROUPPER inside a
589 transactionally executing RTM region. */
590 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
591 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
592 |= bit_arch_Prefer_No_VZEROUPPER;
595 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
596 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
597 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
598 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
600 /* This spells out "AuthenticAMD" or "HygonGenuine". */
601 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
602 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
604 unsigned int extended_model;
606 kind = arch_kind_amd;
608 get_common_indices (cpu_features, &family, &model, &extended_model,
611 get_extended_indices (cpu_features);
613 update_active (cpu_features);
615 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
617 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
619 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
620 FMA4 requires AVX, determine if FMA4 is usable here. */
621 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
627 if (model >= 0x60 && model <= 0x7f)
629 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
630 |= (bit_arch_Fast_Unaligned_Load
631 | bit_arch_Fast_Copy_Backward);
633 /* Unaligned AVX loads are slower.*/
634 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
635 &= ~bit_arch_AVX_Fast_Unaligned_Load;
639 /* This spells out "CentaurHauls" or " Shanghai ". */
640 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
641 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
643 unsigned int extended_model, stepping;
645 kind = arch_kind_zhaoxin;
647 get_common_indices (cpu_features, &family, &model, &extended_model,
650 get_extended_indices (cpu_features);
652 update_active (cpu_features);
654 model += extended_model;
657 if (model == 0xf || model == 0x19)
659 CPU_FEATURE_UNSET (cpu_features, AVX);
660 CPU_FEATURE_UNSET (cpu_features, AVX2);
662 cpu_features->preferred[index_arch_Slow_SSE4_2]
663 |= bit_arch_Slow_SSE4_2;
665 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
666 &= ~bit_arch_AVX_Fast_Unaligned_Load;
669 else if (family == 0x7)
673 CPU_FEATURE_UNSET (cpu_features, AVX);
674 CPU_FEATURE_UNSET (cpu_features, AVX2);
676 cpu_features->preferred[index_arch_Slow_SSE4_2]
677 |= bit_arch_Slow_SSE4_2;
679 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
680 &= ~bit_arch_AVX_Fast_Unaligned_Load;
682 else if (model == 0x3b)
684 CPU_FEATURE_UNSET (cpu_features, AVX);
685 CPU_FEATURE_UNSET (cpu_features, AVX2);
687 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
688 &= ~bit_arch_AVX_Fast_Unaligned_Load;
694 kind = arch_kind_other;
695 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
696 update_active (cpu_features);
699 /* Support i586 if CX8 is available. */
700 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
701 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
703 /* Support i686 if CMOV is available. */
704 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
705 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
711 cpu_features->basic.kind = kind;
712 cpu_features->basic.family = family;
713 cpu_features->basic.model = model;
714 cpu_features->basic.stepping = stepping;
716 dl_init_cacheinfo (cpu_features);
718 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
721 TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
722 TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
725 bool disable_xsave_features = false;
727 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
729 /* These features are usable only if OSXSAVE is usable. */
730 CPU_FEATURE_UNSET (cpu_features, XSAVE);
731 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
732 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
733 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
734 CPU_FEATURE_UNSET (cpu_features, XFD);
736 disable_xsave_features = true;
739 if (disable_xsave_features
740 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
741 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
743 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
744 cpu_features->xsave_state_size = 0;
746 CPU_FEATURE_UNSET (cpu_features, AVX);
747 CPU_FEATURE_UNSET (cpu_features, AVX2);
748 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
749 CPU_FEATURE_UNSET (cpu_features, FMA);
750 CPU_FEATURE_UNSET (cpu_features, VAES);
751 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
752 CPU_FEATURE_UNSET (cpu_features, XOP);
753 CPU_FEATURE_UNSET (cpu_features, F16C);
754 CPU_FEATURE_UNSET (cpu_features, AVX512F);
755 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
756 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
757 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
758 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
759 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
760 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
761 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
762 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
763 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
764 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
765 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
766 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
767 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
768 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
769 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
770 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
771 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
772 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
773 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
774 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
776 CPU_FEATURE_UNSET (cpu_features, FMA4);
780 GLRO(dl_hwcap) = HWCAP_X86_64;
781 if (cpu_features->basic.kind == arch_kind_intel)
783 const char *platform = NULL;
785 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
787 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
789 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
790 platform = "xeon_phi";
794 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
795 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
796 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
797 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
802 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
803 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
804 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
805 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
806 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
807 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
808 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
809 platform = "haswell";
811 if (platform != NULL)
812 GLRO(dl_platform) = platform;
816 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
817 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
819 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
820 GLRO(dl_platform) = "i686";
821 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
822 GLRO(dl_platform) = "i586";
826 TUNABLE_GET (x86_ibt, tunable_val_t *,
827 TUNABLE_CALLBACK (set_x86_ibt));
828 TUNABLE_GET (x86_shstk, tunable_val_t *,
829 TUNABLE_CALLBACK (set_x86_shstk));
831 /* Check CET status. */
832 unsigned int cet_status = get_cet_status ();
834 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
835 CPU_FEATURE_UNSET (cpu_features, IBT)
836 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
837 CPU_FEATURE_UNSET (cpu_features, SHSTK)
841 GL(dl_x86_feature_1) = cet_status;
844 /* Check if IBT and SHSTK are enabled by kernel. */
845 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
846 || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
848 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
849 disabled by environment variable:
851 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
853 unsigned int cet_feature = 0;
854 if (!CPU_FEATURE_USABLE (IBT))
855 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
856 if (!CPU_FEATURE_USABLE (SHSTK))
857 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
861 int res = dl_cet_disable_cet (cet_feature);
863 /* Clear the disabled bits in dl_x86_feature_1. */
865 GL(dl_x86_feature_1) &= ~cet_feature;
868 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
869 lock CET if IBT or SHSTK is enabled permissively. */
870 if (GL(dl_x86_feature_control).ibt != cet_permissive
871 && GL(dl_x86_feature_control).shstk != cet_permissive)
879 /* NB: In libc.a, call init_cacheinfo. */