6c1b5efc5f4b48f05d0d28b4b3c14da801be3f50
[platform/upstream/glibc.git] / sysdeps / x86 / cpu-features.c
1 /* Initialize CPU feature data.
2    This file is part of the GNU C Library.
3    Copyright (C) 2008-2023 Free Software Foundation, Inc.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18
19 #include <dl-hwcap.h>
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
25 #include <dl-hwcap2.h>
26
27 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28   attribute_hidden;
29
30 #ifdef __LP64__
31 static void
32 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
33 {
34   if (valp->numval)
35     GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
36       |= bit_arch_Prefer_MAP_32BIT_EXEC;
37 }
38 #endif
39
40 #if CET_ENABLED
41 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
42   attribute_hidden;
43 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
44   attribute_hidden;
45
46 # include <dl-cet.h>
47 #endif
48
49 static void
50 update_active (struct cpu_features *cpu_features)
51 {
52   /* Copy the cpuid bits to active bits for CPU featuress whose usability
53      in user space can be detected without additonal OS support.  */
54   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
55   CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
56   CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
57   CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
58   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
59   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
60   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
61   CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
62   CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
63   CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
64   CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
65   CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
66   CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
67   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
68   CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
69   CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
70   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
71   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
72   CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
73   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
74   CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
75   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
76   CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
77   CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
78   CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
79   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
80   CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
81   CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
82   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
83   CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
84   CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
85   CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
86   CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
87   CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
88   CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
89   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
90   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
91   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
92   CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
93   CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
94   CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
95   CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
96   CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
97   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
98   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
99   CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
100   CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
101   CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
102   CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
103   CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
104   CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
105   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
106   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
107   CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
108
109   if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
110     CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
111
112 #if CET_ENABLED
113   CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
114   CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
115 #endif
116
117   /* Can we call xgetbv?  */
118   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
119     {
120       unsigned int xcrlow;
121       unsigned int xcrhigh;
122       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
123       /* Is YMM and XMM state usable?  */
124       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
125           == (bit_YMM_state | bit_XMM_state))
126         {
127           /* Determine if AVX is usable.  */
128           if (CPU_FEATURES_CPU_P (cpu_features, AVX))
129             {
130               CPU_FEATURE_SET (cpu_features, AVX);
131               /* The following features depend on AVX being usable.  */
132               /* Determine if AVX2 is usable.  */
133               if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
134                 {
135                   CPU_FEATURE_SET (cpu_features, AVX2);
136
137                   /* Unaligned load with 256-bit AVX registers are faster
138                      on Intel/AMD processors with AVX2.  */
139                   cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
140                     |= bit_arch_AVX_Fast_Unaligned_Load;
141                 }
142               /* Determine if AVX-VNNI is usable.  */
143               CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
144               /* Determine if FMA is usable.  */
145               CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
146               /* Determine if VAES is usable.  */
147               CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
148               /* Determine if VPCLMULQDQ is usable.  */
149               CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
150               /* Determine if XOP is usable.  */
151               CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
152               /* Determine if F16C is usable.  */
153               CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
154             }
155
156           /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
157              ZMM16-ZMM31 state are enabled.  */
158           if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
159                          | bit_ZMM16_31_state))
160               == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
161             {
162               /* Determine if AVX512F is usable.  */
163               if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
164                 {
165                   CPU_FEATURE_SET (cpu_features, AVX512F);
166                   /* Determine if AVX512CD is usable.  */
167                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
168                   /* Determine if AVX512ER is usable.  */
169                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
170                   /* Determine if AVX512PF is usable.  */
171                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
172                   /* Determine if AVX512VL is usable.  */
173                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
174                   /* Determine if AVX512DQ is usable.  */
175                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
176                   /* Determine if AVX512BW is usable.  */
177                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
178                   /* Determine if AVX512_4FMAPS is usable.  */
179                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
180                   /* Determine if AVX512_4VNNIW is usable.  */
181                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
182                   /* Determine if AVX512_BITALG is usable.  */
183                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
184                   /* Determine if AVX512_IFMA is usable.  */
185                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
186                   /* Determine if AVX512_VBMI is usable.  */
187                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
188                   /* Determine if AVX512_VBMI2 is usable.  */
189                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
190                   /* Determine if is AVX512_VNNI usable.  */
191                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
192                   /* Determine if AVX512_VPOPCNTDQ is usable.  */
193                   CPU_FEATURE_SET_ACTIVE (cpu_features,
194                                           AVX512_VPOPCNTDQ);
195                   /* Determine if AVX512_VP2INTERSECT is usable.  */
196                   CPU_FEATURE_SET_ACTIVE (cpu_features,
197                                           AVX512_VP2INTERSECT);
198                   /* Determine if AVX512_BF16 is usable.  */
199                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
200                   /* Determine if AVX512_FP16 is usable.  */
201                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
202                 }
203             }
204         }
205
206       /* Are XTILECFG and XTILEDATA states usable?  */
207       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
208           == (bit_XTILECFG_state | bit_XTILEDATA_state))
209         {
210           /* Determine if AMX_BF16 is usable.  */
211           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
212           /* Determine if AMX_TILE is usable.  */
213           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
214           /* Determine if AMX_INT8 is usable.  */
215           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
216           /* Determine if AMX_FP16 is usable.  */
217           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
218         }
219
220       /* These features are usable only when OSXSAVE is enabled.  */
221       CPU_FEATURE_SET (cpu_features, XSAVE);
222       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
223       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
224       CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
225       CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
226
227       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
228          size + integer register save size and align it to 64 bytes.  */
229       if (cpu_features->basic.max_cpuid >= 0xd)
230         {
231           unsigned int eax, ebx, ecx, edx;
232
233           __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
234           if (ebx != 0)
235             {
236               unsigned int xsave_state_full_size
237                 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
238
239               cpu_features->xsave_state_size
240                 = xsave_state_full_size;
241               cpu_features->xsave_state_full_size
242                 = xsave_state_full_size;
243
244               /* Check if XSAVEC is available.  */
245               if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
246                 {
247                   unsigned int xstate_comp_offsets[32];
248                   unsigned int xstate_comp_sizes[32];
249                   unsigned int i;
250
251                   xstate_comp_offsets[0] = 0;
252                   xstate_comp_offsets[1] = 160;
253                   xstate_comp_offsets[2] = 576;
254                   xstate_comp_sizes[0] = 160;
255                   xstate_comp_sizes[1] = 256;
256
257                   for (i = 2; i < 32; i++)
258                     {
259                       if ((STATE_SAVE_MASK & (1 << i)) != 0)
260                         {
261                           __cpuid_count (0xd, i, eax, ebx, ecx, edx);
262                           xstate_comp_sizes[i] = eax;
263                         }
264                       else
265                         {
266                           ecx = 0;
267                           xstate_comp_sizes[i] = 0;
268                         }
269
270                       if (i > 2)
271                         {
272                           xstate_comp_offsets[i]
273                             = (xstate_comp_offsets[i - 1]
274                                + xstate_comp_sizes[i -1]);
275                           if ((ecx & (1 << 1)) != 0)
276                             xstate_comp_offsets[i]
277                               = ALIGN_UP (xstate_comp_offsets[i], 64);
278                         }
279                     }
280
281                   /* Use XSAVEC.  */
282                   unsigned int size
283                     = xstate_comp_offsets[31] + xstate_comp_sizes[31];
284                   if (size)
285                     {
286                       cpu_features->xsave_state_size
287                         = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
288                       CPU_FEATURE_SET (cpu_features, XSAVEC);
289                     }
290                 }
291             }
292         }
293     }
294
295   /* Determine if PKU is usable.  */
296   if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
297     CPU_FEATURE_SET (cpu_features, PKU);
298
299   /* Determine if Key Locker instructions are usable.  */
300   if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
301     {
302       CPU_FEATURE_SET (cpu_features, AESKLE);
303       CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
304       CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
305     }
306
307   dl_check_hwcap2 (cpu_features);
308
309   cpu_features->isa_1 = get_isa_level (cpu_features);
310 }
311
312 static void
313 get_extended_indices (struct cpu_features *cpu_features)
314 {
315   unsigned int eax, ebx, ecx, edx;
316   __cpuid (0x80000000, eax, ebx, ecx, edx);
317   if (eax >= 0x80000001)
318     __cpuid (0x80000001,
319              cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
320              cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
321              cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
322              cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
323   if (eax >= 0x80000007)
324     __cpuid (0x80000007,
325              cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
326              cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
327              cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
328              cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
329   if (eax >= 0x80000008)
330     __cpuid (0x80000008,
331              cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
332              cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
333              cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
334              cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
335 }
336
337 static void
338 get_common_indices (struct cpu_features *cpu_features,
339                     unsigned int *family, unsigned int *model,
340                     unsigned int *extended_model, unsigned int *stepping)
341 {
342   if (family)
343     {
344       unsigned int eax;
345       __cpuid (1, eax,
346                cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
347                cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
348                cpu_features->features[CPUID_INDEX_1].cpuid.edx);
349       cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
350       *family = (eax >> 8) & 0x0f;
351       *model = (eax >> 4) & 0x0f;
352       *extended_model = (eax >> 12) & 0xf0;
353       *stepping = eax & 0x0f;
354       if (*family == 0x0f)
355         {
356           *family += (eax >> 20) & 0xff;
357           *model += *extended_model;
358         }
359     }
360
361   if (cpu_features->basic.max_cpuid >= 7)
362     {
363       __cpuid_count (7, 0,
364                      cpu_features->features[CPUID_INDEX_7].cpuid.eax,
365                      cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
366                      cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
367                      cpu_features->features[CPUID_INDEX_7].cpuid.edx);
368       __cpuid_count (7, 1,
369                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
370                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
371                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
372                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
373     }
374
375   if (cpu_features->basic.max_cpuid >= 0xd)
376     __cpuid_count (0xd, 1,
377                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
378                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
379                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
380                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
381
382   if (cpu_features->basic.max_cpuid >= 0x14)
383     __cpuid_count (0x14, 0,
384                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
385                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
386                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
387                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
388
389   if (cpu_features->basic.max_cpuid >= 0x19)
390     __cpuid_count (0x19, 0,
391                    cpu_features->features[CPUID_INDEX_19].cpuid.eax,
392                    cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
393                    cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
394                    cpu_features->features[CPUID_INDEX_19].cpuid.edx);
395
396   dl_check_minsigstacksize (cpu_features);
397 }
398
399 _Static_assert (((index_arch_Fast_Unaligned_Load
400                   == index_arch_Fast_Unaligned_Copy)
401                  && (index_arch_Fast_Unaligned_Load
402                      == index_arch_Prefer_PMINUB_for_stringop)
403                  && (index_arch_Fast_Unaligned_Load
404                      == index_arch_Slow_SSE4_2)
405                  && (index_arch_Fast_Unaligned_Load
406                      == index_arch_Fast_Rep_String)
407                  && (index_arch_Fast_Unaligned_Load
408                      == index_arch_Fast_Copy_Backward)),
409                 "Incorrect index_arch_Fast_Unaligned_Load");
410
411 static inline void
412 init_cpu_features (struct cpu_features *cpu_features)
413 {
414   unsigned int ebx, ecx, edx;
415   unsigned int family = 0;
416   unsigned int model = 0;
417   unsigned int stepping = 0;
418   enum cpu_features_kind kind;
419
420 #if !HAS_CPUID
421   if (__get_cpuid_max (0, 0) == 0)
422     {
423       kind = arch_kind_other;
424       goto no_cpuid;
425     }
426 #endif
427
428   __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
429
430   /* This spells out "GenuineIntel".  */
431   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
432     {
433       unsigned int extended_model;
434
435       kind = arch_kind_intel;
436
437       get_common_indices (cpu_features, &family, &model, &extended_model,
438                           &stepping);
439
440       get_extended_indices (cpu_features);
441
442       update_active (cpu_features);
443
444       if (family == 0x06)
445         {
446           model += extended_model;
447           switch (model)
448             {
449             case 0x1c:
450             case 0x26:
451               /* BSF is slow on Atom.  */
452               cpu_features->preferred[index_arch_Slow_BSF]
453                 |= bit_arch_Slow_BSF;
454               break;
455
456             case 0x57:
457               /* Knights Landing.  Enable Silvermont optimizations.  */
458
459             case 0x7a:
460               /* Unaligned load versions are faster than SSSE3
461                  on Goldmont Plus.  */
462
463             case 0x5c:
464             case 0x5f:
465               /* Unaligned load versions are faster than SSSE3
466                  on Goldmont.  */
467
468             case 0x4c:
469             case 0x5a:
470             case 0x75:
471               /* Airmont is a die shrink of Silvermont.  */
472
473             case 0x37:
474             case 0x4a:
475             case 0x4d:
476             case 0x5d:
477               /* Unaligned load versions are faster than SSSE3
478                  on Silvermont.  */
479               cpu_features->preferred[index_arch_Fast_Unaligned_Load]
480                 |= (bit_arch_Fast_Unaligned_Load
481                     | bit_arch_Fast_Unaligned_Copy
482                     | bit_arch_Prefer_PMINUB_for_stringop
483                     | bit_arch_Slow_SSE4_2);
484               break;
485
486             case 0x86:
487             case 0x96:
488             case 0x9c:
489               /* Enable rep string instructions, unaligned load, unaligned
490                  copy, pminub and avoid SSE 4.2 on Tremont.  */
491               cpu_features->preferred[index_arch_Fast_Rep_String]
492                 |= (bit_arch_Fast_Rep_String
493                     | bit_arch_Fast_Unaligned_Load
494                     | bit_arch_Fast_Unaligned_Copy
495                     | bit_arch_Prefer_PMINUB_for_stringop
496                     | bit_arch_Slow_SSE4_2);
497               break;
498
499             default:
500               /* Unknown family 0x06 processors.  Assuming this is one
501                  of Core i3/i5/i7 processors if AVX is available.  */
502               if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
503                 break;
504               /* Fall through.  */
505
506             case 0x1a:
507             case 0x1e:
508             case 0x1f:
509             case 0x25:
510             case 0x2c:
511             case 0x2e:
512             case 0x2f:
513               /* Rep string instructions, unaligned load, unaligned copy,
514                  and pminub are fast on Intel Core i3, i5 and i7.  */
515               cpu_features->preferred[index_arch_Fast_Rep_String]
516                 |= (bit_arch_Fast_Rep_String
517                     | bit_arch_Fast_Unaligned_Load
518                     | bit_arch_Fast_Unaligned_Copy
519                     | bit_arch_Prefer_PMINUB_for_stringop);
520               break;
521             }
522
523          /* Disable TSX on some processors to avoid TSX on kernels that
524             weren't updated with the latest microcode package (which
525             disables broken feature by default).  */
526          switch (model)
527             {
528             case 0x55:
529               if (stepping <= 5)
530                 goto disable_tsx;
531               break;
532             case 0x8e:
533               /* NB: Although the errata documents that for model == 0x8e,
534                  only 0xb stepping or lower are impacted, the intention of
535                  the errata was to disable TSX on all client processors on
536                  all steppings.  Include 0xc stepping which is an Intel
537                  Core i7-8665U, a client mobile processor.  */
538             case 0x9e:
539               if (stepping > 0xc)
540                 break;
541               /* Fall through.  */
542             case 0x4e:
543             case 0x5e:
544               {
545                 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
546                    processors listed in:
547
548 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
549                  */
550 disable_tsx:
551                 CPU_FEATURE_UNSET (cpu_features, HLE);
552                 CPU_FEATURE_UNSET (cpu_features, RTM);
553                 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
554               }
555               break;
556             case 0x3f:
557               /* Xeon E7 v3 with stepping >= 4 has working TSX.  */
558               if (stepping >= 4)
559                 break;
560               /* Fall through.  */
561             case 0x3c:
562             case 0x45:
563             case 0x46:
564               /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
565                  with stepping >= 4) to avoid TSX on kernels that weren't
566                  updated with the latest microcode package (which disables
567                  broken feature by default).  */
568               CPU_FEATURE_UNSET (cpu_features, RTM);
569               break;
570             }
571         }
572
573
574       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
575          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
576          frequency if AVX512ER isn't available.  */
577       if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
578         cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
579           |= bit_arch_Prefer_No_VZEROUPPER;
580       else
581         {
582           /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
583              when ZMM load and store instructions are used.  */
584           if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
585             cpu_features->preferred[index_arch_Prefer_No_AVX512]
586               |= bit_arch_Prefer_No_AVX512;
587
588           /* Avoid RTM abort triggered by VZEROUPPER inside a
589              transactionally executing RTM region.  */
590           if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
591             cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
592               |= bit_arch_Prefer_No_VZEROUPPER;
593         }
594
595       /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
596       if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
597         cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
598           |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
599     }
600   /* This spells out "AuthenticAMD" or "HygonGenuine".  */
601   else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
602            || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
603     {
604       unsigned int extended_model;
605
606       kind = arch_kind_amd;
607
608       get_common_indices (cpu_features, &family, &model, &extended_model,
609                           &stepping);
610
611       get_extended_indices (cpu_features);
612
613       update_active (cpu_features);
614
615       ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
616
617       if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
618         {
619           /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
620              FMA4 requires AVX, determine if FMA4 is usable here.  */
621           CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
622         }
623
624       if (family == 0x15)
625         {
626           /* "Excavator"   */
627           if (model >= 0x60 && model <= 0x7f)
628           {
629             cpu_features->preferred[index_arch_Fast_Unaligned_Load]
630               |= (bit_arch_Fast_Unaligned_Load
631                   | bit_arch_Fast_Copy_Backward);
632
633             /* Unaligned AVX loads are slower.*/
634             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
635               &= ~bit_arch_AVX_Fast_Unaligned_Load;
636           }
637         }
638     }
639   /* This spells out "CentaurHauls" or " Shanghai ".  */
640   else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
641            || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
642     {
643       unsigned int extended_model, stepping;
644
645       kind = arch_kind_zhaoxin;
646
647       get_common_indices (cpu_features, &family, &model, &extended_model,
648                           &stepping);
649
650       get_extended_indices (cpu_features);
651
652       update_active (cpu_features);
653
654       model += extended_model;
655       if (family == 0x6)
656         {
657           if (model == 0xf || model == 0x19)
658             {
659               CPU_FEATURE_UNSET (cpu_features, AVX);
660               CPU_FEATURE_UNSET (cpu_features, AVX2);
661
662               cpu_features->preferred[index_arch_Slow_SSE4_2]
663                 |= bit_arch_Slow_SSE4_2;
664
665               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
666                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
667             }
668         }
669       else if (family == 0x7)
670         {
671           if (model == 0x1b)
672             {
673               CPU_FEATURE_UNSET (cpu_features, AVX);
674               CPU_FEATURE_UNSET (cpu_features, AVX2);
675
676               cpu_features->preferred[index_arch_Slow_SSE4_2]
677                 |= bit_arch_Slow_SSE4_2;
678
679               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
680                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
681             }
682           else if (model == 0x3b)
683             {
684               CPU_FEATURE_UNSET (cpu_features, AVX);
685               CPU_FEATURE_UNSET (cpu_features, AVX2);
686
687               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
688                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
689             }
690         }
691     }
692   else
693     {
694       kind = arch_kind_other;
695       get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
696       update_active (cpu_features);
697     }
698
699   /* Support i586 if CX8 is available.  */
700   if (CPU_FEATURES_CPU_P (cpu_features, CX8))
701     cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
702
703   /* Support i686 if CMOV is available.  */
704   if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
705     cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
706
707 #if !HAS_CPUID
708 no_cpuid:
709 #endif
710
711   cpu_features->basic.kind = kind;
712   cpu_features->basic.family = family;
713   cpu_features->basic.model = model;
714   cpu_features->basic.stepping = stepping;
715
716   dl_init_cacheinfo (cpu_features);
717
718   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
719
720 #ifdef __LP64__
721   TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
722                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
723 #endif
724
725   bool disable_xsave_features = false;
726
727   if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
728     {
729       /* These features are usable only if OSXSAVE is usable.  */
730       CPU_FEATURE_UNSET (cpu_features, XSAVE);
731       CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
732       CPU_FEATURE_UNSET (cpu_features, XSAVEC);
733       CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
734       CPU_FEATURE_UNSET (cpu_features, XFD);
735
736       disable_xsave_features = true;
737     }
738
739   if (disable_xsave_features
740       || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
741           && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
742     {
743       /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable.  */
744       cpu_features->xsave_state_size = 0;
745
746       CPU_FEATURE_UNSET (cpu_features, AVX);
747       CPU_FEATURE_UNSET (cpu_features, AVX2);
748       CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
749       CPU_FEATURE_UNSET (cpu_features, FMA);
750       CPU_FEATURE_UNSET (cpu_features, VAES);
751       CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
752       CPU_FEATURE_UNSET (cpu_features, XOP);
753       CPU_FEATURE_UNSET (cpu_features, F16C);
754       CPU_FEATURE_UNSET (cpu_features, AVX512F);
755       CPU_FEATURE_UNSET (cpu_features, AVX512CD);
756       CPU_FEATURE_UNSET (cpu_features, AVX512ER);
757       CPU_FEATURE_UNSET (cpu_features, AVX512PF);
758       CPU_FEATURE_UNSET (cpu_features, AVX512VL);
759       CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
760       CPU_FEATURE_UNSET (cpu_features, AVX512BW);
761       CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
762       CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
763       CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
764       CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
765       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
766       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
767       CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
768       CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
769       CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
770       CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
771       CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
772       CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
773       CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
774       CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
775
776       CPU_FEATURE_UNSET (cpu_features, FMA4);
777     }
778
779 #ifdef __x86_64__
780   GLRO(dl_hwcap) = HWCAP_X86_64;
781   if (cpu_features->basic.kind == arch_kind_intel)
782     {
783       const char *platform = NULL;
784
785       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
786         {
787           if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
788             {
789               if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
790                 platform = "xeon_phi";
791             }
792           else
793             {
794               if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
795                   && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
796                   && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
797                 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
798             }
799         }
800
801       if (platform == NULL
802           && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
803           && CPU_FEATURE_USABLE_P (cpu_features, FMA)
804           && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
805           && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
806           && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
807           && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
808           && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
809         platform = "haswell";
810
811       if (platform != NULL)
812         GLRO(dl_platform) = platform;
813     }
814 #else
815   GLRO(dl_hwcap) = 0;
816   if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
817     GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
818
819   if (CPU_FEATURES_ARCH_P (cpu_features, I686))
820     GLRO(dl_platform) = "i686";
821   else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
822     GLRO(dl_platform) = "i586";
823 #endif
824
825 #if CET_ENABLED
826   TUNABLE_GET (x86_ibt, tunable_val_t *,
827                TUNABLE_CALLBACK (set_x86_ibt));
828   TUNABLE_GET (x86_shstk, tunable_val_t *,
829                TUNABLE_CALLBACK (set_x86_shstk));
830
831   /* Check CET status.  */
832   unsigned int cet_status = get_cet_status ();
833
834   if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
835     CPU_FEATURE_UNSET (cpu_features, IBT)
836   if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
837     CPU_FEATURE_UNSET (cpu_features, SHSTK)
838
839   if (cet_status)
840     {
841       GL(dl_x86_feature_1) = cet_status;
842
843 # ifndef SHARED
844       /* Check if IBT and SHSTK are enabled by kernel.  */
845       if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
846           || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
847         {
848           /* Disable IBT and/or SHSTK if they are enabled by kernel, but
849              disabled by environment variable:
850
851              GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
852            */
853           unsigned int cet_feature = 0;
854           if (!CPU_FEATURE_USABLE (IBT))
855             cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
856           if (!CPU_FEATURE_USABLE (SHSTK))
857             cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
858
859           if (cet_feature)
860             {
861               int res = dl_cet_disable_cet (cet_feature);
862
863               /* Clear the disabled bits in dl_x86_feature_1.  */
864               if (res == 0)
865                 GL(dl_x86_feature_1) &= ~cet_feature;
866             }
867
868           /* Lock CET if IBT or SHSTK is enabled in executable.  Don't
869              lock CET if IBT or SHSTK is enabled permissively.  */
870           if (GL(dl_x86_feature_control).ibt != cet_permissive
871               && GL(dl_x86_feature_control).shstk != cet_permissive)
872             dl_cet_lock_cet ();
873         }
874 # endif
875     }
876 #endif
877
878 #ifndef SHARED
879   /* NB: In libc.a, call init_cacheinfo.  */
880   init_cacheinfo ();
881 #endif
882 }