elf: Add ELF_DYNAMIC_AFTER_RELOC to rewrite PLT
[platform/upstream/glibc.git] / sysdeps / x86 / cpu-features.c
1 /* Initialize CPU feature data.
2    This file is part of the GNU C Library.
3    Copyright (C) 2008-2024 Free Software Foundation, Inc.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18
19 #include <dl-hwcap.h>
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
25 #include <dl-hwcap2.h>
26
27 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28   attribute_hidden;
29
30 #ifdef SHARED
31 static void
32 TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
33 {
34   if (valp->numval != 0)
35     {
36       /* Use JMPABS only on APX processors.  */
37       const struct cpu_features *cpu_features = __get_cpu_features ();
38       GL (dl_x86_feature_control).plt_rewrite
39           = ((valp->numval > 1 && CPU_FEATURE_PRESENT_P (cpu_features, APX_F))
40                  ? plt_rewrite_jmpabs
41                  : plt_rewrite_jmp);
42     }
43 }
44 #endif
45
46 #ifdef __LP64__
47 static void
48 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
49 {
50   if (valp->numval)
51     GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
52       |= bit_arch_Prefer_MAP_32BIT_EXEC;
53 }
54 #endif
55
56 #if CET_ENABLED
57 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
58   attribute_hidden;
59 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
60   attribute_hidden;
61
62 # include <dl-cet.h>
63 #endif
64
65 static void
66 update_active (struct cpu_features *cpu_features)
67 {
68   /* Copy the cpuid bits to active bits for CPU featuress whose usability
69      in user space can be detected without additional OS support.  */
70   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
71   CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
72   CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
73   CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
74   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
75   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
76   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
77   CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
78   CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
79   CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
80   CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
81   CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
82   CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
83   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
84   CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
85   CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
86   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
87   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
88   CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
89   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
90   CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
91   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
92   CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
93   CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
94   CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
95   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
96   CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
97   CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
98   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
99   CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
100   CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
101   CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
102   CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
103   CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
104   CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
105   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
106   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
107   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
108   CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
109   CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
110   CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
111   CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
112   CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
113   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
114   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
115   CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
116   CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
117   CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
118   CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
119   CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
120   CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
121   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
122   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
123   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHI);
124   CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
125
126   if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
127     CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
128
129 #if CET_ENABLED && 0
130   CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
131   CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
132 #endif
133
134   enum
135   {
136     os_xmm = 1,
137     os_ymm = 2,
138     os_zmm = 4
139   } os_vector_size = os_xmm;
140   /* Can we call xgetbv?  */
141   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
142     {
143       unsigned int xcrlow;
144       unsigned int xcrhigh;
145       CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
146       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
147       /* Is YMM and XMM state usable?  */
148       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
149           == (bit_YMM_state | bit_XMM_state))
150         {
151           /* Determine if AVX is usable.  */
152           if (CPU_FEATURES_CPU_P (cpu_features, AVX))
153             {
154               os_vector_size |= os_ymm;
155               CPU_FEATURE_SET (cpu_features, AVX);
156               /* The following features depend on AVX being usable.  */
157               /* Determine if AVX2 is usable.  */
158               if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
159                 {
160                   CPU_FEATURE_SET (cpu_features, AVX2);
161
162                   /* Unaligned load with 256-bit AVX registers are faster
163                      on Intel/AMD processors with AVX2.  */
164                   cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
165                     |= bit_arch_AVX_Fast_Unaligned_Load;
166                 }
167               /* Determine if AVX-IFMA is usable.  */
168               CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_IFMA);
169               /* Determine if AVX-NE-CONVERT is usable.  */
170               CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_NE_CONVERT);
171               /* Determine if AVX-VNNI is usable.  */
172               CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
173               /* Determine if AVX-VNNI-INT8 is usable.  */
174               CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI_INT8);
175               /* Determine if FMA is usable.  */
176               CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
177               /* Determine if VAES is usable.  */
178               CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
179               /* Determine if VPCLMULQDQ is usable.  */
180               CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
181               /* Determine if XOP is usable.  */
182               CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
183               /* Determine if F16C is usable.  */
184               CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
185             }
186
187           /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
188              ZMM16-ZMM31 state are enabled.  */
189           if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
190                          | bit_ZMM16_31_state))
191               == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
192             {
193               os_vector_size |= os_zmm;
194               /* Determine if AVX512F is usable.  */
195               if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
196                 {
197                   CPU_FEATURE_SET (cpu_features, AVX512F);
198                   /* Determine if AVX512CD is usable.  */
199                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
200                   /* Determine if AVX512ER is usable.  */
201                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
202                   /* Determine if AVX512PF is usable.  */
203                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
204                   /* Determine if AVX512VL is usable.  */
205                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
206                   /* Determine if AVX512DQ is usable.  */
207                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
208                   /* Determine if AVX512BW is usable.  */
209                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
210                   /* Determine if AVX512_4FMAPS is usable.  */
211                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
212                   /* Determine if AVX512_4VNNIW is usable.  */
213                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
214                   /* Determine if AVX512_BITALG is usable.  */
215                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
216                   /* Determine if AVX512_IFMA is usable.  */
217                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
218                   /* Determine if AVX512_VBMI is usable.  */
219                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
220                   /* Determine if AVX512_VBMI2 is usable.  */
221                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
222                   /* Determine if is AVX512_VNNI usable.  */
223                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
224                   /* Determine if AVX512_VPOPCNTDQ is usable.  */
225                   CPU_FEATURE_SET_ACTIVE (cpu_features,
226                                           AVX512_VPOPCNTDQ);
227                   /* Determine if AVX512_VP2INTERSECT is usable.  */
228                   CPU_FEATURE_SET_ACTIVE (cpu_features,
229                                           AVX512_VP2INTERSECT);
230                   /* Determine if AVX512_BF16 is usable.  */
231                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
232                   /* Determine if AVX512_FP16 is usable.  */
233                   CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
234                 }
235             }
236         }
237
238       if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
239           && cpu_features->basic.max_cpuid >= 0x24)
240         {
241           __cpuid_count (
242               0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
243               cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
244               cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
245               cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
246           if (os_vector_size & os_xmm)
247             CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
248           if (os_vector_size & os_ymm)
249             CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
250           if (os_vector_size & os_zmm)
251             CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
252         }
253
254       /* Are XTILECFG and XTILEDATA states usable?  */
255       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
256           == (bit_XTILECFG_state | bit_XTILEDATA_state))
257         {
258           /* Determine if AMX_BF16 is usable.  */
259           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
260           /* Determine if AMX_TILE is usable.  */
261           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
262           /* Determine if AMX_INT8 is usable.  */
263           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
264           /* Determine if AMX_FP16 is usable.  */
265           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
266           /* Determine if AMX_COMPLEX is usable.  */
267           CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_COMPLEX);
268         }
269
270       /* APX is usable only if the APX state is supported by kernel.  */
271       if ((xcrlow & bit_APX_state) != 0)
272         CPU_FEATURE_SET_ACTIVE (cpu_features, APX_F);
273
274       /* These features are usable only when OSXSAVE is enabled.  */
275       CPU_FEATURE_SET (cpu_features, XSAVE);
276       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
277       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
278       CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
279       CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
280
281       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
282          size + integer register save size and align it to 64 bytes.  */
283       if (cpu_features->basic.max_cpuid >= 0xd)
284         {
285           unsigned int eax, ebx, ecx, edx;
286
287           __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
288           if (ebx != 0)
289             {
290               unsigned int xsave_state_full_size
291                 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
292
293               cpu_features->xsave_state_size
294                 = xsave_state_full_size;
295               cpu_features->xsave_state_full_size
296                 = xsave_state_full_size;
297
298               /* Check if XSAVEC is available.  */
299               if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
300                 {
301                   unsigned int xstate_comp_offsets[32];
302                   unsigned int xstate_comp_sizes[32];
303                   unsigned int i;
304
305                   xstate_comp_offsets[0] = 0;
306                   xstate_comp_offsets[1] = 160;
307                   xstate_comp_offsets[2] = 576;
308                   xstate_comp_sizes[0] = 160;
309                   xstate_comp_sizes[1] = 256;
310
311                   for (i = 2; i < 32; i++)
312                     {
313                       if ((STATE_SAVE_MASK & (1 << i)) != 0)
314                         {
315                           __cpuid_count (0xd, i, eax, ebx, ecx, edx);
316                           xstate_comp_sizes[i] = eax;
317                         }
318                       else
319                         {
320                           ecx = 0;
321                           xstate_comp_sizes[i] = 0;
322                         }
323
324                       if (i > 2)
325                         {
326                           xstate_comp_offsets[i]
327                             = (xstate_comp_offsets[i - 1]
328                                + xstate_comp_sizes[i -1]);
329                           if ((ecx & (1 << 1)) != 0)
330                             xstate_comp_offsets[i]
331                               = ALIGN_UP (xstate_comp_offsets[i], 64);
332                         }
333                     }
334
335                   /* Use XSAVEC.  */
336                   unsigned int size
337                     = xstate_comp_offsets[31] + xstate_comp_sizes[31];
338                   if (size)
339                     {
340                       cpu_features->xsave_state_size
341                         = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
342                       CPU_FEATURE_SET (cpu_features, XSAVEC);
343                     }
344                 }
345             }
346         }
347     }
348
349   /* Determine if PKU is usable.  */
350   if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
351     CPU_FEATURE_SET (cpu_features, PKU);
352
353   /* Determine if Key Locker instructions are usable.  */
354   if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
355     {
356       CPU_FEATURE_SET (cpu_features, AESKLE);
357       CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
358       CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
359     }
360
361   dl_check_hwcap2 (cpu_features);
362
363   cpu_features->isa_1 = get_isa_level (cpu_features);
364 }
365
366 static void
367 get_extended_indices (struct cpu_features *cpu_features)
368 {
369   unsigned int eax, ebx, ecx, edx;
370   __cpuid (0x80000000, eax, ebx, ecx, edx);
371   if (eax >= 0x80000001)
372     __cpuid (0x80000001,
373              cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
374              cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
375              cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
376              cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
377   if (eax >= 0x80000007)
378     __cpuid (0x80000007,
379              cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
380              cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
381              cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
382              cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
383   if (eax >= 0x80000008)
384     __cpuid (0x80000008,
385              cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
386              cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
387              cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
388              cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
389 }
390
391 static void
392 get_common_indices (struct cpu_features *cpu_features,
393                     unsigned int *family, unsigned int *model,
394                     unsigned int *extended_model, unsigned int *stepping)
395 {
396   if (family)
397     {
398       unsigned int eax;
399       __cpuid (1, eax,
400                cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
401                cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
402                cpu_features->features[CPUID_INDEX_1].cpuid.edx);
403       cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
404       *family = (eax >> 8) & 0x0f;
405       *model = (eax >> 4) & 0x0f;
406       *extended_model = (eax >> 12) & 0xf0;
407       *stepping = eax & 0x0f;
408       if (*family == 0x0f)
409         {
410           *family += (eax >> 20) & 0xff;
411           *model += *extended_model;
412         }
413     }
414
415   if (cpu_features->basic.max_cpuid >= 7)
416     {
417       __cpuid_count (7, 0,
418                      cpu_features->features[CPUID_INDEX_7].cpuid.eax,
419                      cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
420                      cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
421                      cpu_features->features[CPUID_INDEX_7].cpuid.edx);
422       __cpuid_count (7, 1,
423                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
424                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
425                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
426                      cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
427     }
428
429   if (cpu_features->basic.max_cpuid >= 0xd)
430     __cpuid_count (0xd, 1,
431                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
432                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
433                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
434                    cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
435
436   if (cpu_features->basic.max_cpuid >= 0x14)
437     __cpuid_count (0x14, 0,
438                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
439                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
440                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
441                    cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
442
443   if (cpu_features->basic.max_cpuid >= 0x19)
444     __cpuid_count (0x19, 0,
445                    cpu_features->features[CPUID_INDEX_19].cpuid.eax,
446                    cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
447                    cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
448                    cpu_features->features[CPUID_INDEX_19].cpuid.edx);
449
450   dl_check_minsigstacksize (cpu_features);
451 }
452
453 _Static_assert (((index_arch_Fast_Unaligned_Load
454                   == index_arch_Fast_Unaligned_Copy)
455                  && (index_arch_Fast_Unaligned_Load
456                      == index_arch_Prefer_PMINUB_for_stringop)
457                  && (index_arch_Fast_Unaligned_Load
458                      == index_arch_Slow_SSE4_2)
459                  && (index_arch_Fast_Unaligned_Load
460                      == index_arch_Fast_Rep_String)
461                  && (index_arch_Fast_Unaligned_Load
462                      == index_arch_Fast_Copy_Backward)),
463                 "Incorrect index_arch_Fast_Unaligned_Load");
464
465
466 /* Intel Family-6 microarch list.  */
467 enum
468 {
469   /* Atom processors.  */
470   INTEL_ATOM_BONNELL,
471   INTEL_ATOM_SILVERMONT,
472   INTEL_ATOM_AIRMONT,
473   INTEL_ATOM_GOLDMONT,
474   INTEL_ATOM_GOLDMONT_PLUS,
475   INTEL_ATOM_SIERRAFOREST,
476   INTEL_ATOM_GRANDRIDGE,
477   INTEL_ATOM_TREMONT,
478
479   /* Bigcore processors.  */
480   INTEL_BIGCORE_MEROM,
481   INTEL_BIGCORE_PENRYN,
482   INTEL_BIGCORE_DUNNINGTON,
483   INTEL_BIGCORE_NEHALEM,
484   INTEL_BIGCORE_WESTMERE,
485   INTEL_BIGCORE_SANDYBRIDGE,
486   INTEL_BIGCORE_IVYBRIDGE,
487   INTEL_BIGCORE_HASWELL,
488   INTEL_BIGCORE_BROADWELL,
489   INTEL_BIGCORE_SKYLAKE,
490   INTEL_BIGCORE_KABYLAKE,
491   INTEL_BIGCORE_COMETLAKE,
492   INTEL_BIGCORE_SKYLAKE_AVX512,
493   INTEL_BIGCORE_CANNONLAKE,
494   INTEL_BIGCORE_ICELAKE,
495   INTEL_BIGCORE_TIGERLAKE,
496   INTEL_BIGCORE_ROCKETLAKE,
497   INTEL_BIGCORE_SAPPHIRERAPIDS,
498   INTEL_BIGCORE_RAPTORLAKE,
499   INTEL_BIGCORE_EMERALDRAPIDS,
500   INTEL_BIGCORE_METEORLAKE,
501   INTEL_BIGCORE_LUNARLAKE,
502   INTEL_BIGCORE_ARROWLAKE,
503   INTEL_BIGCORE_GRANITERAPIDS,
504
505   /* Mixed (bigcore + atom SOC).  */
506   INTEL_MIXED_LAKEFIELD,
507   INTEL_MIXED_ALDERLAKE,
508
509   /* KNL.  */
510   INTEL_KNIGHTS_MILL,
511   INTEL_KNIGHTS_LANDING,
512
513   /* Unknown.  */
514   INTEL_UNKNOWN,
515 };
516
517 static unsigned int
518 intel_get_fam6_microarch (unsigned int model,
519                           __attribute__ ((unused)) unsigned int stepping)
520 {
521   switch (model)
522     {
523     case 0x1C:
524     case 0x26:
525       return INTEL_ATOM_BONNELL;
526     case 0x27:
527     case 0x35:
528     case 0x36:
529       /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
530          (microarchitecturally identical).  */
531       return INTEL_ATOM_BONNELL;
532     case 0x37:
533     case 0x4A:
534     case 0x4D:
535     case 0x5D:
536       return INTEL_ATOM_SILVERMONT;
537     case 0x4C:
538     case 0x5A:
539     case 0x75:
540       return INTEL_ATOM_AIRMONT;
541     case 0x5C:
542     case 0x5F:
543       return INTEL_ATOM_GOLDMONT;
544     case 0x7A:
545       return INTEL_ATOM_GOLDMONT_PLUS;
546     case 0xAF:
547       return INTEL_ATOM_SIERRAFOREST;
548     case 0xB6:
549       return INTEL_ATOM_GRANDRIDGE;
550     case 0x86:
551     case 0x96:
552     case 0x9C:
553       return INTEL_ATOM_TREMONT;
554     case 0x0F:
555     case 0x16:
556       return INTEL_BIGCORE_MEROM;
557     case 0x17:
558       return INTEL_BIGCORE_PENRYN;
559     case 0x1D:
560       return INTEL_BIGCORE_DUNNINGTON;
561     case 0x1A:
562     case 0x1E:
563     case 0x1F:
564     case 0x2E:
565       return INTEL_BIGCORE_NEHALEM;
566     case 0x25:
567     case 0x2C:
568     case 0x2F:
569       return INTEL_BIGCORE_WESTMERE;
570     case 0x2A:
571     case 0x2D:
572       return INTEL_BIGCORE_SANDYBRIDGE;
573     case 0x3A:
574     case 0x3E:
575       return INTEL_BIGCORE_IVYBRIDGE;
576     case 0x3C:
577     case 0x3F:
578     case 0x45:
579     case 0x46:
580       return INTEL_BIGCORE_HASWELL;
581     case 0x3D:
582     case 0x47:
583     case 0x4F:
584     case 0x56:
585       return INTEL_BIGCORE_BROADWELL;
586     case 0x4E:
587     case 0x5E:
588       return INTEL_BIGCORE_SKYLAKE;
589     case 0x8E:
590     /*
591      Stepping = {9}
592         -> Amberlake
593      Stepping = {10}
594         -> Coffeelake
595      Stepping = {11, 12}
596         -> Whiskeylake
597      else
598         -> Kabylake
599
600      All of these are derivatives of Kabylake (Skylake client).
601      */
602           return INTEL_BIGCORE_KABYLAKE;
603     case 0x9E:
604     /*
605      Stepping = {10, 11, 12, 13}
606         -> Coffeelake
607      else
608         -> Kabylake
609
610      Coffeelake is a derivatives of Kabylake (Skylake client).
611      */
612           return INTEL_BIGCORE_KABYLAKE;
613     case 0xA5:
614     case 0xA6:
615       return INTEL_BIGCORE_COMETLAKE;
616     case 0x66:
617       return INTEL_BIGCORE_CANNONLAKE;
618     case 0x55:
619     /*
620      Stepping = {6, 7}
621         -> Cascadelake
622      Stepping = {11}
623         -> Cooperlake
624      else
625         -> Skylake-avx512
626
627      These are all microarchitecturally identical, so use
628      Skylake-avx512 for all of them.
629      */
630       return INTEL_BIGCORE_SKYLAKE_AVX512;
631     case 0x6A:
632     case 0x6C:
633     case 0x7D:
634     case 0x7E:
635     case 0x9D:
636       return INTEL_BIGCORE_ICELAKE;
637     case 0x8C:
638     case 0x8D:
639       return INTEL_BIGCORE_TIGERLAKE;
640     case 0xA7:
641       return INTEL_BIGCORE_ROCKETLAKE;
642     case 0x8F:
643       return INTEL_BIGCORE_SAPPHIRERAPIDS;
644     case 0xB7:
645     case 0xBA:
646     case 0xBF:
647       return INTEL_BIGCORE_RAPTORLAKE;
648     case 0xCF:
649       return INTEL_BIGCORE_EMERALDRAPIDS;
650     case 0xAA:
651     case 0xAC:
652       return INTEL_BIGCORE_METEORLAKE;
653     case 0xbd:
654       return INTEL_BIGCORE_LUNARLAKE;
655     case 0xc6:
656       return INTEL_BIGCORE_ARROWLAKE;
657     case 0xAD:
658     case 0xAE:
659       return INTEL_BIGCORE_GRANITERAPIDS;
660     case 0x8A:
661       return INTEL_MIXED_LAKEFIELD;
662     case 0x97:
663     case 0x9A:
664     case 0xBE:
665       return INTEL_MIXED_ALDERLAKE;
666     case 0x85:
667       return INTEL_KNIGHTS_MILL;
668     case 0x57:
669       return INTEL_KNIGHTS_LANDING;
670     default:
671       return INTEL_UNKNOWN;
672     }
673 }
674
675 static inline void
676 init_cpu_features (struct cpu_features *cpu_features)
677 {
678   unsigned int ebx, ecx, edx;
679   unsigned int family = 0;
680   unsigned int model = 0;
681   unsigned int stepping = 0;
682   enum cpu_features_kind kind;
683
684   cpu_features->cachesize_non_temporal_divisor = 4;
685 #if !HAS_CPUID
686   if (__get_cpuid_max (0, 0) == 0)
687     {
688       kind = arch_kind_other;
689       goto no_cpuid;
690     }
691 #endif
692
693   __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
694
695   /* This spells out "GenuineIntel".  */
696   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
697     {
698       unsigned int extended_model;
699
700       kind = arch_kind_intel;
701
702       get_common_indices (cpu_features, &family, &model, &extended_model,
703                           &stepping);
704
705       get_extended_indices (cpu_features);
706
707       update_active (cpu_features);
708
709       if (family == 0x06)
710         {
711           model += extended_model;
712           unsigned int microarch
713               = intel_get_fam6_microarch (model, stepping);
714
715           switch (microarch)
716             {
717               /* Atom / KNL tuning.  */
718             case INTEL_ATOM_BONNELL:
719               /* BSF is slow on Bonnell.  */
720               cpu_features->preferred[index_arch_Slow_BSF]
721                   |= bit_arch_Slow_BSF;
722               break;
723
724               /* Unaligned load versions are faster than SSSE3
725                      on Airmont, Silvermont, Goldmont, and Goldmont Plus.  */
726             case INTEL_ATOM_AIRMONT:
727             case INTEL_ATOM_SILVERMONT:
728             case INTEL_ATOM_GOLDMONT:
729             case INTEL_ATOM_GOLDMONT_PLUS:
730
731           /* Knights Landing.  Enable Silvermont optimizations.  */
732             case INTEL_KNIGHTS_LANDING:
733
734               cpu_features->preferred[index_arch_Fast_Unaligned_Load]
735                   |= (bit_arch_Fast_Unaligned_Load
736                       | bit_arch_Fast_Unaligned_Copy
737                       | bit_arch_Prefer_PMINUB_for_stringop
738                       | bit_arch_Slow_SSE4_2);
739               break;
740
741             case INTEL_ATOM_TREMONT:
742               /* Enable rep string instructions, unaligned load, unaligned
743                  copy, pminub and avoid SSE 4.2 on Tremont.  */
744               cpu_features->preferred[index_arch_Fast_Rep_String]
745                   |= (bit_arch_Fast_Rep_String
746                       | bit_arch_Fast_Unaligned_Load
747                       | bit_arch_Fast_Unaligned_Copy
748                       | bit_arch_Prefer_PMINUB_for_stringop
749                       | bit_arch_Slow_SSE4_2);
750               break;
751
752            /*
753             Default tuned Knights microarch.
754             case INTEL_KNIGHTS_MILL:
755         */
756
757            /*
758             Default tuned atom microarch.
759             case INTEL_ATOM_SIERRAFOREST:
760             case INTEL_ATOM_GRANDRIDGE:
761            */
762
763               /* Bigcore/Default Tuning.  */
764             default:
765             default_tuning:
766               /* Unknown family 0x06 processors.  Assuming this is one
767                  of Core i3/i5/i7 processors if AVX is available.  */
768               if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
769                 break;
770
771             enable_modern_features:
772               /* Rep string instructions, unaligned load, unaligned copy,
773                  and pminub are fast on Intel Core i3, i5 and i7.  */
774               cpu_features->preferred[index_arch_Fast_Rep_String]
775                   |= (bit_arch_Fast_Rep_String
776                       | bit_arch_Fast_Unaligned_Load
777                       | bit_arch_Fast_Unaligned_Copy
778                       | bit_arch_Prefer_PMINUB_for_stringop);
779               break;
780
781             case INTEL_BIGCORE_NEHALEM:
782             case INTEL_BIGCORE_WESTMERE:
783               /* Older CPUs prefer non-temporal stores at lower threshold.  */
784               cpu_features->cachesize_non_temporal_divisor = 8;
785               goto enable_modern_features;
786
787               /* Older Bigcore microarch (smaller non-temporal store
788                  threshold).  */
789             case INTEL_BIGCORE_SANDYBRIDGE:
790             case INTEL_BIGCORE_IVYBRIDGE:
791             case INTEL_BIGCORE_HASWELL:
792             case INTEL_BIGCORE_BROADWELL:
793               cpu_features->cachesize_non_temporal_divisor = 8;
794               goto default_tuning;
795
796               /* Newer Bigcore microarch (larger non-temporal store
797                  threshold).  */
798             case INTEL_BIGCORE_SKYLAKE:
799             case INTEL_BIGCORE_KABYLAKE:
800             case INTEL_BIGCORE_COMETLAKE:
801             case INTEL_BIGCORE_SKYLAKE_AVX512:
802             case INTEL_BIGCORE_CANNONLAKE:
803             case INTEL_BIGCORE_ICELAKE:
804             case INTEL_BIGCORE_TIGERLAKE:
805             case INTEL_BIGCORE_ROCKETLAKE:
806             case INTEL_BIGCORE_RAPTORLAKE:
807             case INTEL_BIGCORE_METEORLAKE:
808             case INTEL_BIGCORE_LUNARLAKE:
809             case INTEL_BIGCORE_ARROWLAKE:
810             case INTEL_BIGCORE_SAPPHIRERAPIDS:
811             case INTEL_BIGCORE_EMERALDRAPIDS:
812             case INTEL_BIGCORE_GRANITERAPIDS:
813               cpu_features->cachesize_non_temporal_divisor = 2;
814               goto default_tuning;
815
816               /* Default tuned Mixed (bigcore + atom SOC). */
817             case INTEL_MIXED_LAKEFIELD:
818             case INTEL_MIXED_ALDERLAKE:
819               cpu_features->cachesize_non_temporal_divisor = 2;
820               goto default_tuning;
821             }
822
823               /* Disable TSX on some processors to avoid TSX on kernels that
824                  weren't updated with the latest microcode package (which
825                  disables broken feature by default).  */
826           switch (microarch)
827             {
828             case INTEL_BIGCORE_SKYLAKE_AVX512:
829               /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
830               if (stepping <= 5)
831                 goto disable_tsx;
832               break;
833
834             case INTEL_BIGCORE_KABYLAKE:
835               /* NB: Although the errata documents that for model == 0x8e
836                      (kabylake skylake client), only 0xb stepping or lower are
837                      impacted, the intention of the errata was to disable TSX on
838                      all client processors on all steppings.  Include 0xc
839                      stepping which is an Intel Core i7-8665U, a client mobile
840                      processor.  */
841               if (stepping > 0xc)
842                 break;
843               /* Fall through.  */
844             case INTEL_BIGCORE_SKYLAKE:
845                 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
846                    processors listed in:
847
848 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
849                  */
850             disable_tsx:
851                 CPU_FEATURE_UNSET (cpu_features, HLE);
852                 CPU_FEATURE_UNSET (cpu_features, RTM);
853                 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
854                 break;
855
856             case INTEL_BIGCORE_HASWELL:
857                 /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
858                    TSX.  Haswell also include other model numbers that have
859                    working TSX.  */
860                 if (model == 0x3f && stepping >= 4)
861                 break;
862
863                 CPU_FEATURE_UNSET (cpu_features, RTM);
864                 break;
865             }
866         }
867
868
869       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
870          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
871          frequency if AVX512ER isn't available.  */
872       if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
873         cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
874           |= bit_arch_Prefer_No_VZEROUPPER;
875       else
876         {
877           /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
878              when ZMM load and store instructions are used.  */
879           if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
880             cpu_features->preferred[index_arch_Prefer_No_AVX512]
881               |= bit_arch_Prefer_No_AVX512;
882
883           /* Avoid RTM abort triggered by VZEROUPPER inside a
884              transactionally executing RTM region.  */
885           if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
886             cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
887               |= bit_arch_Prefer_No_VZEROUPPER;
888         }
889
890       /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
891       if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
892         cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
893           |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
894     }
895   /* This spells out "AuthenticAMD" or "HygonGenuine".  */
896   else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
897            || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
898     {
899       unsigned int extended_model;
900
901       kind = arch_kind_amd;
902
903       get_common_indices (cpu_features, &family, &model, &extended_model,
904                           &stepping);
905
906       get_extended_indices (cpu_features);
907
908       update_active (cpu_features);
909
910       ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
911
912       if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
913         {
914           /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
915              FMA4 requires AVX, determine if FMA4 is usable here.  */
916           CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
917         }
918
919       if (family == 0x15)
920         {
921           /* "Excavator"   */
922           if (model >= 0x60 && model <= 0x7f)
923           {
924             cpu_features->preferred[index_arch_Fast_Unaligned_Load]
925               |= (bit_arch_Fast_Unaligned_Load
926                   | bit_arch_Fast_Copy_Backward);
927
928             /* Unaligned AVX loads are slower.*/
929             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
930               &= ~bit_arch_AVX_Fast_Unaligned_Load;
931           }
932         }
933     }
934   /* This spells out "CentaurHauls" or " Shanghai ".  */
935   else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
936            || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
937     {
938       unsigned int extended_model, stepping;
939
940       kind = arch_kind_zhaoxin;
941
942       get_common_indices (cpu_features, &family, &model, &extended_model,
943                           &stepping);
944
945       get_extended_indices (cpu_features);
946
947       update_active (cpu_features);
948
949       model += extended_model;
950       if (family == 0x6)
951         {
952           if (model == 0xf || model == 0x19)
953             {
954               CPU_FEATURE_UNSET (cpu_features, AVX);
955               CPU_FEATURE_UNSET (cpu_features, AVX2);
956
957               cpu_features->preferred[index_arch_Slow_SSE4_2]
958                 |= bit_arch_Slow_SSE4_2;
959
960               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
961                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
962             }
963         }
964       else if (family == 0x7)
965         {
966           if (model == 0x1b)
967             {
968               CPU_FEATURE_UNSET (cpu_features, AVX);
969               CPU_FEATURE_UNSET (cpu_features, AVX2);
970
971               cpu_features->preferred[index_arch_Slow_SSE4_2]
972                 |= bit_arch_Slow_SSE4_2;
973
974               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
975                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
976             }
977           else if (model == 0x3b)
978             {
979               CPU_FEATURE_UNSET (cpu_features, AVX);
980               CPU_FEATURE_UNSET (cpu_features, AVX2);
981
982               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
983                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
984             }
985         }
986     }
987   else
988     {
989       kind = arch_kind_other;
990       get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
991       update_active (cpu_features);
992     }
993
994   /* Support i586 if CX8 is available.  */
995   if (CPU_FEATURES_CPU_P (cpu_features, CX8))
996     cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
997
998   /* Support i686 if CMOV is available.  */
999   if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
1000     cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
1001
1002 #if !HAS_CPUID
1003 no_cpuid:
1004 #endif
1005
1006   cpu_features->basic.kind = kind;
1007   cpu_features->basic.family = family;
1008   cpu_features->basic.model = model;
1009   cpu_features->basic.stepping = stepping;
1010
1011   dl_init_cacheinfo (cpu_features);
1012
1013   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
1014
1015 #ifdef __LP64__
1016   TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
1017                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
1018 #endif
1019
1020   bool disable_xsave_features = false;
1021
1022   if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
1023     {
1024       /* These features are usable only if OSXSAVE is usable.  */
1025       CPU_FEATURE_UNSET (cpu_features, XSAVE);
1026       CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
1027       CPU_FEATURE_UNSET (cpu_features, XSAVEC);
1028       CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
1029       CPU_FEATURE_UNSET (cpu_features, XFD);
1030
1031       disable_xsave_features = true;
1032     }
1033
1034   if (disable_xsave_features
1035       || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
1036           && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
1037     {
1038       /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable.  */
1039       cpu_features->xsave_state_size = 0;
1040
1041       CPU_FEATURE_UNSET (cpu_features, AVX);
1042       CPU_FEATURE_UNSET (cpu_features, AVX2);
1043       CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
1044       CPU_FEATURE_UNSET (cpu_features, FMA);
1045       CPU_FEATURE_UNSET (cpu_features, VAES);
1046       CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
1047       CPU_FEATURE_UNSET (cpu_features, XOP);
1048       CPU_FEATURE_UNSET (cpu_features, F16C);
1049       CPU_FEATURE_UNSET (cpu_features, AVX512F);
1050       CPU_FEATURE_UNSET (cpu_features, AVX512CD);
1051       CPU_FEATURE_UNSET (cpu_features, AVX512ER);
1052       CPU_FEATURE_UNSET (cpu_features, AVX512PF);
1053       CPU_FEATURE_UNSET (cpu_features, AVX512VL);
1054       CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
1055       CPU_FEATURE_UNSET (cpu_features, AVX512BW);
1056       CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
1057       CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
1058       CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
1059       CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
1060       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
1061       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
1062       CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
1063       CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
1064       CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
1065       CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
1066       CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
1067       CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
1068       CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
1069       CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
1070
1071       CPU_FEATURE_UNSET (cpu_features, FMA4);
1072     }
1073
1074 #ifdef __x86_64__
1075   GLRO(dl_hwcap) = HWCAP_X86_64;
1076   if (cpu_features->basic.kind == arch_kind_intel)
1077     {
1078       const char *platform = NULL;
1079
1080       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
1081         {
1082           if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
1083             {
1084               if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
1085                 platform = "xeon_phi";
1086             }
1087           else
1088             {
1089               if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
1090                   && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
1091                   && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
1092                 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
1093             }
1094         }
1095
1096       if (platform == NULL
1097           && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
1098           && CPU_FEATURE_USABLE_P (cpu_features, FMA)
1099           && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
1100           && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
1101           && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
1102           && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
1103           && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
1104         platform = "haswell";
1105
1106       if (platform != NULL)
1107         GLRO(dl_platform) = platform;
1108     }
1109 #else
1110   GLRO(dl_hwcap) = 0;
1111   if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
1112     GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
1113
1114   if (CPU_FEATURES_ARCH_P (cpu_features, I686))
1115     GLRO(dl_platform) = "i686";
1116   else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
1117     GLRO(dl_platform) = "i586";
1118 #endif
1119
1120 #if CET_ENABLED
1121   TUNABLE_GET (x86_ibt, tunable_val_t *,
1122                TUNABLE_CALLBACK (set_x86_ibt));
1123   TUNABLE_GET (x86_shstk, tunable_val_t *,
1124                TUNABLE_CALLBACK (set_x86_shstk));
1125 #endif
1126
1127 #ifdef SHARED
1128   TUNABLE_GET (plt_rewrite, tunable_val_t *,
1129                TUNABLE_CALLBACK (set_plt_rewrite));
1130 #else
1131   /* NB: In libc.a, call init_cacheinfo.  */
1132   init_cacheinfo ();
1133 #endif
1134 }