x86: Update CPU feature detection [BZ #26149]
authorH.J. Lu <hjl.tools@gmail.com>
Wed, 17 Jun 2020 13:34:46 +0000 (06:34 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 22 Jun 2020 20:09:33 +0000 (13:09 -0700)
1. Divide architecture features into the usable features and the preferred
features.  The usable features are for correctness and can be exported in
a stable ABI.  The preferred features are for performance and only for
glibc internal use.
2. Change struct cpu_features to

struct cpu_features
{
  struct cpu_features_basic basic;
  unsigned int *usable_p;
  struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
  unsigned int usable[USABLE_FEATURE_INDEX_MAX];
  unsigned int preferred[PREFERRED_FEATURE_INDEX_MAX];
  ...
};

and initialize usable_p to pointer to the usable arary so that

struct cpu_features
{
  struct cpu_features_basic basic;
  unsigned int *usable_p;
  struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
};

can be exported via a stable ABI.  The cpuid and usable arrays can be
expanded with backward binary compatibility for both .o and .so files.
3. Add COMMON_CPUID_INDEX_7_ECX_1 for AVX512_BF16.
4. Detect ENQCMD, PKS, AVX512_VP2INTERSECT, MD_CLEAR, SERIALIZE, HYBRID,
TSXLDTRK, L1D_FLUSH, CORE_CAPABILITIES and AVX512_BF16.
5. Rename CAPABILITIES to ARCH_CAPABILITIES.
6. Check if AVX512_VP2INTERSECT, AVX512_BF16 and PKU are usable.
7. Update CPU feature detection test.

sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h
sysdeps/x86/cpu-features.c
sysdeps/x86/cpu-features.h
sysdeps/x86/cpu-tunables.c
sysdeps/x86/tst-get-cpu-features.c

index eeb2a48..8205ad1 100644 (file)
@@ -33,7 +33,7 @@
   case 21:                                                               \
     if (!__libc_enable_secure                                            \
        && memcmp (envline, "PREFER_MAP_32BIT_EXEC", 21) == 0)            \
-      GLRO(dl_x86_cpu_features).feature[index_arch_Prefer_MAP_32BIT_EXEC] \
+      GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC] \
        |= bit_arch_Prefer_MAP_32BIT_EXEC;                                \
     break;
 
index 5b4a30e..79bc0d7 100644 (file)
@@ -90,11 +90,18 @@ get_common_indices (struct cpu_features *cpu_features,
     }
 
   if (cpu_features->basic.max_cpuid >= 7)
-    __cpuid_count (7, 0,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
-                  cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+    {
+      __cpuid_count (7, 0,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+      __cpuid_count (7, 1,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].eax,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ebx,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ecx,
+                    cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].edx);
+    }
 
   if (cpu_features->basic.max_cpuid >= 0xd)
     __cpuid_count (0xd, 1,
@@ -116,39 +123,39 @@ get_common_indices (struct cpu_features *cpu_features,
          /* Determine if AVX is usable.  */
          if (CPU_FEATURES_CPU_P (cpu_features, AVX))
            {
-             cpu_features->feature[index_arch_AVX_Usable]
+             cpu_features->usable[index_arch_AVX_Usable]
                |= bit_arch_AVX_Usable;
              /* The following features depend on AVX being usable.  */
              /* Determine if AVX2 is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
              {
-               cpu_features->feature[index_arch_AVX2_Usable]
+               cpu_features->usable[index_arch_AVX2_Usable]
                  |= bit_arch_AVX2_Usable;
 
                /* Unaligned load with 256-bit AVX registers are faster on
                   Intel/AMD processors with AVX2.  */
-               cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+               cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
                  |= bit_arch_AVX_Fast_Unaligned_Load;
              }
              /* Determine if FMA is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, FMA))
-               cpu_features->feature[index_arch_FMA_Usable]
+               cpu_features->usable[index_arch_FMA_Usable]
                  |= bit_arch_FMA_Usable;
              /* Determine if VAES is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, VAES))
-               cpu_features->feature[index_arch_VAES_Usable]
+               cpu_features->usable[index_arch_VAES_Usable]
                  |= bit_arch_VAES_Usable;
              /* Determine if VPCLMULQDQ is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ))
-               cpu_features->feature[index_arch_VPCLMULQDQ_Usable]
+               cpu_features->usable[index_arch_VPCLMULQDQ_Usable]
                  |= bit_arch_VPCLMULQDQ_Usable;
              /* Determine if XOP is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, XOP))
-               cpu_features->feature[index_arch_XOP_Usable]
+               cpu_features->usable[index_arch_XOP_Usable]
                  |= bit_arch_XOP_Usable;
              /* Determine if F16C is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, F16C))
-               cpu_features->feature[index_arch_F16C_Usable]
+               cpu_features->usable[index_arch_F16C_Usable]
                  |= bit_arch_F16C_Usable;
            }
 
@@ -161,64 +168,73 @@ get_common_indices (struct cpu_features *cpu_features,
              /* Determine if AVX512F is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
                {
-                 cpu_features->feature[index_arch_AVX512F_Usable]
+                 cpu_features->usable[index_arch_AVX512F_Usable]
                    |= bit_arch_AVX512F_Usable;
                  /* Determine if AVX512CD is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
-                   cpu_features->feature[index_arch_AVX512CD_Usable]
+                   cpu_features->usable[index_arch_AVX512CD_Usable]
                      |= bit_arch_AVX512CD_Usable;
                  /* Determine if AVX512ER is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
-                   cpu_features->feature[index_arch_AVX512ER_Usable]
+                   cpu_features->usable[index_arch_AVX512ER_Usable]
                      |= bit_arch_AVX512ER_Usable;
                  /* Determine if AVX512PF is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
-                   cpu_features->feature[index_arch_AVX512PF_Usable]
+                   cpu_features->usable[index_arch_AVX512PF_Usable]
                      |= bit_arch_AVX512PF_Usable;
                  /* Determine if AVX512VL is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
-                   cpu_features->feature[index_arch_AVX512VL_Usable]
+                   cpu_features->usable[index_arch_AVX512VL_Usable]
                      |= bit_arch_AVX512VL_Usable;
                  /* Determine if AVX512DQ is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
-                   cpu_features->feature[index_arch_AVX512DQ_Usable]
+                   cpu_features->usable[index_arch_AVX512DQ_Usable]
                      |= bit_arch_AVX512DQ_Usable;
                  /* Determine if AVX512BW is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW))
-                   cpu_features->feature[index_arch_AVX512BW_Usable]
+                   cpu_features->usable[index_arch_AVX512BW_Usable]
                      |= bit_arch_AVX512BW_Usable;
                  /* Determine if AVX512_4FMAPS is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS))
-                   cpu_features->feature[index_arch_AVX512_4FMAPS_Usable]
+                   cpu_features->usable[index_arch_AVX512_4FMAPS_Usable]
                      |= bit_arch_AVX512_4FMAPS_Usable;
                  /* Determine if AVX512_4VNNIW is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW))
-                   cpu_features->feature[index_arch_AVX512_4VNNIW_Usable]
+                   cpu_features->usable[index_arch_AVX512_4VNNIW_Usable]
                      |= bit_arch_AVX512_4VNNIW_Usable;
                  /* Determine if AVX512_BITALG is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG))
-                   cpu_features->feature[index_arch_AVX512_BITALG_Usable]
+                   cpu_features->usable[index_arch_AVX512_BITALG_Usable]
                      |= bit_arch_AVX512_BITALG_Usable;
                  /* Determine if AVX512_IFMA is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA))
-                   cpu_features->feature[index_arch_AVX512_IFMA_Usable]
+                   cpu_features->usable[index_arch_AVX512_IFMA_Usable]
                      |= bit_arch_AVX512_IFMA_Usable;
                  /* Determine if AVX512_VBMI is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI))
-                   cpu_features->feature[index_arch_AVX512_VBMI_Usable]
+                   cpu_features->usable[index_arch_AVX512_VBMI_Usable]
                      |= bit_arch_AVX512_VBMI_Usable;
                  /* Determine if AVX512_VBMI2 is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2))
-                   cpu_features->feature[index_arch_AVX512_VBMI2_Usable]
+                   cpu_features->usable[index_arch_AVX512_VBMI2_Usable]
                      |= bit_arch_AVX512_VBMI2_Usable;
                  /* Determine if is AVX512_VNNI usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI))
-                   cpu_features->feature[index_arch_AVX512_VNNI_Usable]
+                   cpu_features->usable[index_arch_AVX512_VNNI_Usable]
                      |= bit_arch_AVX512_VNNI_Usable;
                  /* Determine if AVX512_VPOPCNTDQ is usable.  */
                  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
-                   cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
+                   cpu_features->usable[index_arch_AVX512_VPOPCNTDQ_Usable]
                      |= bit_arch_AVX512_VPOPCNTDQ_Usable;
+                 /* Determine if AVX512_VP2INTERSECT is usable.  */
+                 if (CPU_FEATURES_CPU_P (cpu_features,
+                                         AVX512_VP2INTERSECT))
+                   cpu_features->usable[index_arch_AVX512_VP2INTERSECT_Usable]
+                     |= bit_arch_AVX512_VP2INTERSECT_Usable;
+                 /* Determine if AVX512_BF16 is usable.  */
+                 if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BF16))
+                   cpu_features->usable[index_arch_AVX512_BF16_Usable]
+                     |= bit_arch_AVX512_BF16_Usable;
                }
            }
        }
@@ -284,13 +300,18 @@ get_common_indices (struct cpu_features *cpu_features,
                    {
                      cpu_features->xsave_state_size
                        = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
-                     cpu_features->feature[index_arch_XSAVEC_Usable]
+                     cpu_features->usable[index_arch_XSAVEC_Usable]
                        |= bit_arch_XSAVEC_Usable;
                    }
                }
            }
        }
     }
+
+  /* Determine if PKU is usable.  */
+  if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
+    cpu_features->usable[index_arch_PKU_Usable]
+      |= bit_arch_PKU_Usable;
 }
 
 _Static_assert (((index_arch_Fast_Unaligned_Load
@@ -314,6 +335,8 @@ init_cpu_features (struct cpu_features *cpu_features)
   unsigned int stepping = 0;
   enum cpu_features_kind kind;
 
+  cpu_features->usable_p = cpu_features->usable;
+
 #if !HAS_CPUID
   if (__get_cpuid_max (0, 0) == 0)
     {
@@ -344,7 +367,7 @@ init_cpu_features (struct cpu_features *cpu_features)
            case 0x1c:
            case 0x26:
              /* BSF is slow on Atom.  */
-             cpu_features->feature[index_arch_Slow_BSF]
+             cpu_features->preferred[index_arch_Slow_BSF]
                |= bit_arch_Slow_BSF;
              break;
 
@@ -371,7 +394,7 @@ init_cpu_features (struct cpu_features *cpu_features)
            case 0x5d:
              /* Unaligned load versions are faster than SSSE3
                 on Silvermont.  */
-             cpu_features->feature[index_arch_Fast_Unaligned_Load]
+             cpu_features->preferred[index_arch_Fast_Unaligned_Load]
                |= (bit_arch_Fast_Unaligned_Load
                    | bit_arch_Fast_Unaligned_Copy
                    | bit_arch_Prefer_PMINUB_for_stringop
@@ -383,7 +406,7 @@ init_cpu_features (struct cpu_features *cpu_features)
            case 0x9c:
              /* Enable rep string instructions, unaligned load, unaligned
                 copy, pminub and avoid SSE 4.2 on Tremont.  */
-             cpu_features->feature[index_arch_Fast_Rep_String]
+             cpu_features->preferred[index_arch_Fast_Rep_String]
                |= (bit_arch_Fast_Rep_String
                    | bit_arch_Fast_Unaligned_Load
                    | bit_arch_Fast_Unaligned_Copy
@@ -407,7 +430,7 @@ init_cpu_features (struct cpu_features *cpu_features)
            case 0x2f:
              /* Rep string instructions, unaligned load, unaligned copy,
                 and pminub are fast on Intel Core i3, i5 and i7.  */
-             cpu_features->feature[index_arch_Fast_Rep_String]
+             cpu_features->preferred[index_arch_Fast_Rep_String]
                |= (bit_arch_Fast_Rep_String
                    | bit_arch_Fast_Unaligned_Load
                    | bit_arch_Fast_Unaligned_Copy
@@ -442,10 +465,10 @@ init_cpu_features (struct cpu_features *cpu_features)
          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
         frequency if AVX512ER isn't available.  */
       if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
-       cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
+       cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
          |= bit_arch_Prefer_No_VZEROUPPER;
       else
-       cpu_features->feature[index_arch_Prefer_No_AVX512]
+       cpu_features->preferred[index_arch_Prefer_No_AVX512]
          |= bit_arch_Prefer_No_AVX512;
     }
   /* This spells out "AuthenticAMD" or "HygonGenuine".  */
@@ -468,7 +491,7 @@ init_cpu_features (struct cpu_features *cpu_features)
          /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and
             FMA4 requires AVX, determine if FMA4 is usable here.  */
          if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
-           cpu_features->feature[index_arch_FMA4_Usable]
+           cpu_features->usable[index_arch_FMA4_Usable]
              |= bit_arch_FMA4_Usable;
        }
 
@@ -477,13 +500,13 @@ init_cpu_features (struct cpu_features *cpu_features)
          /* "Excavator"   */
          if (model >= 0x60 && model <= 0x7f)
          {
-           cpu_features->feature[index_arch_Fast_Unaligned_Load]
+           cpu_features->preferred[index_arch_Fast_Unaligned_Load]
              |= (bit_arch_Fast_Unaligned_Load
                  | bit_arch_Fast_Copy_Backward);
 
            /* Unaligned AVX loads are slower.*/
-           cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-                 &= ~bit_arch_AVX_Fast_Unaligned_Load;
+           cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+             &= ~bit_arch_AVX_Fast_Unaligned_Load;
          }
        }
     }
@@ -505,41 +528,38 @@ init_cpu_features (struct cpu_features *cpu_features)
         {
           if (model == 0xf || model == 0x19)
             {
-              cpu_features->feature[index_arch_AVX_Usable]
-                &= (~bit_arch_AVX_Usable
-                & ~bit_arch_AVX2_Usable);
+              cpu_features->usable[index_arch_AVX_Usable]
+                &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
 
-              cpu_features->feature[index_arch_Slow_SSE4_2]
-                |= (bit_arch_Slow_SSE4_2);
+              cpu_features->preferred[index_arch_Slow_SSE4_2]
+                |= bit_arch_Slow_SSE4_2;
 
-              cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-                &= ~bit_arch_AVX_Fast_Unaligned_Load;
+             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+               &= ~bit_arch_AVX_Fast_Unaligned_Load;
             }
         }
       else if (family == 0x7)
         {
-          if (model == 0x1b)
-            {
-              cpu_features->feature[index_arch_AVX_Usable]
-                &= (~bit_arch_AVX_Usable
-                & ~bit_arch_AVX2_Usable);
+         if (model == 0x1b)
+           {
+             cpu_features->usable[index_arch_AVX_Usable]
+               &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
 
-              cpu_features->feature[index_arch_Slow_SSE4_2]
-                |= bit_arch_Slow_SSE4_2;
+             cpu_features->preferred[index_arch_Slow_SSE4_2]
+               |= bit_arch_Slow_SSE4_2;
+
+             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+               &= ~bit_arch_AVX_Fast_Unaligned_Load;
+           }
+         else if (model == 0x3b)
+           {
+             cpu_features->usable[index_arch_AVX_Usable]
+               &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
 
-              cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-                &= ~bit_arch_AVX_Fast_Unaligned_Load;
-           }
-         else if (model == 0x3b)
-           {
-             cpu_features->feature[index_arch_AVX_Usable]
-               &= (~bit_arch_AVX_Usable
-               & ~bit_arch_AVX2_Usable);
-
-               cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
-               &= ~bit_arch_AVX_Fast_Unaligned_Load;
-           }
-       }
+             cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+               &= ~bit_arch_AVX_Fast_Unaligned_Load;
+           }
+       }
     }
   else
     {
@@ -549,11 +569,11 @@ init_cpu_features (struct cpu_features *cpu_features)
 
   /* Support i586 if CX8 is available.  */
   if (CPU_FEATURES_CPU_P (cpu_features, CX8))
-    cpu_features->feature[index_arch_I586] |= bit_arch_I586;
+    cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
 
   /* Support i686 if CMOV is available.  */
   if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
-    cpu_features->feature[index_arch_I686] |= bit_arch_I686;
+    cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
 
 #if !HAS_CPUID
 no_cpuid:
index 722bcdc..574f055 100644 (file)
 
 enum
 {
-  /* The integer bit array index for the first set of internal feature
+  /* The integer bit array index for the first set of usable feature
      bits.  */
-  FEATURE_INDEX_1 = 0,
-  FEATURE_INDEX_2,
+  USABLE_FEATURE_INDEX_1 = 0,
   /* The current maximum size of the feature integer bit array.  */
-  FEATURE_INDEX_MAX
+  USABLE_FEATURE_INDEX_MAX
+};
+
+enum
+{
+  /* The integer bit array index for the first set of preferred feature
+     bits.  */
+  PREFERRED_FEATURE_INDEX_1 = 0,
+  /* The current maximum size of the feature integer bit array.  */
+  PREFERRED_FEATURE_INDEX_MAX
 };
 
 enum
@@ -36,6 +44,7 @@ enum
   COMMON_CPUID_INDEX_D_ECX_1,
   COMMON_CPUID_INDEX_80000007,
   COMMON_CPUID_INDEX_80000008,
+  COMMON_CPUID_INDEX_7_ECX_1,
   /* Keep the following line at the end.  */
   COMMON_CPUID_INDEX_MAX
 };
@@ -68,9 +77,11 @@ struct cpu_features_basic
 
 struct cpu_features
 {
-  struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
-  unsigned int feature[FEATURE_INDEX_MAX];
   struct cpu_features_basic basic;
+  unsigned int *usable_p;
+  struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
+  unsigned int usable[USABLE_FEATURE_INDEX_MAX];
+  unsigned int preferred[PREFERRED_FEATURE_INDEX_MAX];
   /* The state size for XSAVEC or XSAVE.  The type must be unsigned long
      int so that we use
 
@@ -102,7 +113,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define CPU_FEATURES_CPU_P(ptr, name) \
   ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
 # define CPU_FEATURES_ARCH_P(ptr, name) \
-  ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
+  ((ptr->feature_##name[index_arch_##name] & (bit_arch_##name)) != 0)
 
 /* HAS_CPU_FEATURE evaluates to true if CPU supports the feature.  */
 #define HAS_CPU_FEATURE(name) \
@@ -112,13 +123,12 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_ARCH_FEATURE(name) \
   CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
 /* CPU_FEATURE_USABLE evaluates to true if the feature is usable.  */
-#define CPU_FEATURE_USABLE(name)                               \
-  ((need_arch_feature_##name && HAS_ARCH_FEATURE (name##_Usable))      \
-   || (!need_arch_feature_##name && HAS_CPU_FEATURE(name)))
+#define CPU_FEATURE_USABLE(name) \
+  HAS_ARCH_FEATURE (name##_Usable)
 
 /* Architecture features.  */
 
-/* FEATURE_INDEX_1.  */
+/* USABLE_FEATURE_INDEX_1.  */
 #define bit_arch_AVX_Usable                    (1u << 0)
 #define bit_arch_AVX2_Usable                   (1u << 1)
 #define bit_arch_AVX512F_Usable                        (1u << 2)
@@ -143,237 +153,65 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define bit_arch_XOP_Usable                    (1u << 21)
 #define bit_arch_XSAVEC_Usable                 (1u << 22)
 #define bit_arch_F16C_Usable                   (1u << 23)
-
-#define index_arch_AVX_Usable                  FEATURE_INDEX_1
-#define index_arch_AVX2_Usable                 FEATURE_INDEX_1
-#define index_arch_AVX512F_Usable              FEATURE_INDEX_1
-#define index_arch_AVX512CD_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512ER_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512PF_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512VL_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512BW_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512DQ_Usable             FEATURE_INDEX_1
-#define index_arch_AVX512_4FMAPS_Usable                FEATURE_INDEX_1
-#define index_arch_AVX512_4VNNIW_Usable                FEATURE_INDEX_1
-#define index_arch_AVX512_BITALG_Usable                FEATURE_INDEX_1
-#define index_arch_AVX512_IFMA_Usable          FEATURE_INDEX_1
-#define index_arch_AVX512_VBMI_Usable          FEATURE_INDEX_1
-#define index_arch_AVX512_VBMI2_Usable         FEATURE_INDEX_1
-#define index_arch_AVX512_VNNI_Usable          FEATURE_INDEX_1
-#define index_arch_AVX512_VPOPCNTDQ_Usable     FEATURE_INDEX_1
-#define index_arch_FMA_Usable                  FEATURE_INDEX_1
-#define index_arch_FMA4_Usable                 FEATURE_INDEX_1
-#define index_arch_VAES_Usable                 FEATURE_INDEX_1
-#define index_arch_VPCLMULQDQ_Usable           FEATURE_INDEX_1
-#define index_arch_XOP_Usable                  FEATURE_INDEX_1
-#define index_arch_XSAVEC_Usable               FEATURE_INDEX_1
-#define index_arch_F16C_Usable                 FEATURE_INDEX_1
-
-/* Unused.  Compiler will optimize them out.  */
-#define bit_arch_SSE3_Usable                   (1u << 0)
-#define bit_arch_PCLMULQDQ_Usable              (1u << 0)
-#define bit_arch_SSSE3_Usable                  (1u << 0)
-#define bit_arch_CMPXCHG16B_Usable             (1u << 0)
-#define bit_arch_SSE4_1_Usable                 (1u << 0)
-#define bit_arch_SSE4_2_Usable                 (1u << 0)
-#define bit_arch_MOVBE_Usable                  (1u << 0)
-#define bit_arch_POPCNT_Usable                 (1u << 0)
-#define bit_arch_AES_Usable                    (1u << 0)
-#define bit_arch_XSAVE_Usable                  (1u << 0)
-#define bit_arch_OSXSAVE_Usable                        (1u << 0)
-#define bit_arch_RDRAND_Usable                 (1u << 0)
-#define bit_arch_FPU_Usable                    (1u << 0)
-#define bit_arch_TSC_Usable                    (1u << 0)
-#define bit_arch_MSR_Usable                    (1u << 0)
-#define bit_arch_CX8_Usable                    (1u << 0)
-#define bit_arch_SEP_Usable                    (1u << 0)
-#define bit_arch_CMOV_Usable                   (1u << 0)
-#define bit_arch_CLFSH_Usable                  (1u << 0)
-#define bit_arch_MMX_Usable                    (1u << 0)
-#define bit_arch_FXSR_Usable                   (1u << 0)
-#define bit_arch_SSE_Usable                    (1u << 0)
-#define bit_arch_SSE2_Usable                   (1u << 0)
-#define bit_arch_FSGSBASE_Usable               (1u << 0)
-#define bit_arch_BMI1_Usable                   (1u << 0)
-#define bit_arch_HLE_Usable                    (1u << 0)
-#define bit_arch_BMI2_Usable                   (1u << 0)
-#define bit_arch_ERMS_Usable                   (1u << 0)
-#define bit_arch_RTM_Usable                    (1u << 0)
-#define bit_arch_RDSEED_Usable                 (1u << 0)
-#define bit_arch_ADX_Usable                    (1u << 0)
-#define bit_arch_CLFLUSHOPT_Usable             (1u << 0)
-#define bit_arch_CLWB_Usable                   (1u << 0)
-#define bit_arch_SHA_Usable                    (1u << 0)
-#define bit_arch_PREFETCHWT1_Usable            (1u << 0)
-#define bit_arch_GFNI_Usable                   (1u << 0)
-#define bit_arch_RDPID_Usable                  (1u << 0)
-#define bit_arch_CLDEMOTE_Usable               (1u << 0)
-#define bit_arch_MOVDIRI_Usable                        (1u << 0)
-#define bit_arch_MOVDIR64B_Usable              (1u << 0)
-#define bit_arch_FSRM_Usable                   (1u << 0)
-#define bit_arch_LAHF64_SAHF64_Usable          (1u << 0)
-#define bit_arch_SVM_Usable                    (1u << 0)
-#define bit_arch_LZCNT_Usable                  (1u << 0)
-#define bit_arch_SSE4A_Usable                  (1u << 0)
-#define bit_arch_PREFETCHW_Usable              (1u << 0)
-#define bit_arch_TBM_Usable                    (1u << 0)
-#define bit_arch_SYSCALL_SYSRET_Usable         (1u << 0)
-#define bit_arch_RDTSCP_Usable                 (1u << 0)
-#define bit_arch_XSAVEOPT_Usable               (1u << 0)
-#define bit_arch_XGETBV_ECX_1_Usable           (1u << 0)
-#define bit_arch_XSAVES_Usable                 (1u << 0)
-#define bit_arch_INVARIANT_TSC_Usable          (1u << 0)
-#define bit_arch_WBNOINVD_Usable               (1u << 0)
-
-/* Unused.  Compiler will optimize them out.  */
-#define index_arch_SSE3_Usable                 FEATURE_INDEX_1
-#define index_arch_PCLMULQDQ_Usable            FEATURE_INDEX_1
-#define index_arch_SSSE3_Usable                        FEATURE_INDEX_1
-#define index_arch_CMPXCHG16B_Usable           FEATURE_INDEX_1
-#define index_arch_SSE4_1_Usable               FEATURE_INDEX_1
-#define index_arch_SSE4_2_Usable               FEATURE_INDEX_1
-#define index_arch_MOVBE_Usable                        FEATURE_INDEX_1
-#define index_arch_POPCNT_Usable               FEATURE_INDEX_1
-#define index_arch_AES_Usable                  FEATURE_INDEX_1
-#define index_arch_XSAVE_Usable                        FEATURE_INDEX_1
-#define index_arch_OSXSAVE_Usable              FEATURE_INDEX_1
-#define index_arch_RDRAND_Usable               FEATURE_INDEX_1
-#define index_arch_FPU_Usable                  FEATURE_INDEX_1
-#define index_arch_TSC_Usable                  FEATURE_INDEX_1
-#define index_arch_MSR_Usable                  FEATURE_INDEX_1
-#define index_arch_CX8_Usable                  FEATURE_INDEX_1
-#define index_arch_SEP_Usable                  FEATURE_INDEX_1
-#define index_arch_CMOV_Usable                 FEATURE_INDEX_1
-#define index_arch_CLFSH_Usable                        FEATURE_INDEX_1
-#define index_arch_MMX_Usable                  FEATURE_INDEX_1
-#define index_arch_FXSR_Usable                 FEATURE_INDEX_1
-#define index_arch_SSE_Usable                  FEATURE_INDEX_1
-#define index_arch_SSE2_Usable                 FEATURE_INDEX_1
-#define index_arch_FSGSBASE_Usable             FEATURE_INDEX_1
-#define index_arch_BMI1_Usable                 FEATURE_INDEX_1
-#define index_arch_HLE_Usable                  FEATURE_INDEX_1
-#define index_arch_BMI2_Usable                 FEATURE_INDEX_1
-#define index_arch_ERMS_Usable                 FEATURE_INDEX_1
-#define index_arch_RTM_Usable                  FEATURE_INDEX_1
-#define index_arch_RDSEED_Usable               FEATURE_INDEX_1
-#define index_arch_ADX_Usable                  FEATURE_INDEX_1
-#define index_arch_CLFLUSHOPT_Usable           FEATURE_INDEX_1
-#define index_arch_CLWB_Usable                 FEATURE_INDEX_1
-#define index_arch_SHA_Usable                  FEATURE_INDEX_1
-#define index_arch_PREFETCHWT1_Usable          FEATURE_INDEX_1
-#define index_arch_GFNI_Usable                 FEATURE_INDEX_1
-#define index_arch_RDPID_Usable                        FEATURE_INDEX_1
-#define index_arch_CLDEMOTE_Usable             FEATURE_INDEX_1
-#define index_arch_MOVDIRI_Usable              FEATURE_INDEX_1
-#define index_arch_MOVDIR64B_Usable            FEATURE_INDEX_1
-#define index_arch_FSRM_Usable                 FEATURE_INDEX_1
-#define index_arch_LAHF64_SAHF64_Usable                FEATURE_INDEX_1
-#define index_arch_LZCNT_Usable                        FEATURE_INDEX_1
-#define index_arch_SSE4A_Usable                        FEATURE_INDEX_1
-#define index_arch_PREFETCHW_Usable            FEATURE_INDEX_1
-#define index_arch_TBM_Usable                  FEATURE_INDEX_1
-#define index_arch_SYSCALL_SYSRET_Usable       FEATURE_INDEX_1
-#define index_arch_RDTSCP_Usable               FEATURE_INDEX_1
-#define index_arch_XSAVEOPT_Usable             FEATURE_INDEX_1
-#define index_arch_XGETBV_ECX_1_Usable         FEATURE_INDEX_1
-#define index_arch_XSAVES_Usable               FEATURE_INDEX_1
-#define index_arch_INVARIANT_TSC_Usable                FEATURE_INDEX_1
-#define index_arch_WBNOINVD_Usable             FEATURE_INDEX_1
-
-/* COMMON_CPUID_INDEX_1.  */
-
-/* ECX.  */
-#define        need_arch_feature_SSE3                  0
-#define        need_arch_feature_PCLMULQDQ             0
-#define need_arch_feature_SSSE3                        0
-#define need_arch_feature_FMA                  1
-#define need_arch_feature_CMPXCHG16B           0
-#define need_arch_feature_SSE4_1               0
-#define need_arch_feature_SSE4_2               0
-#define need_arch_feature_MOVBE                        0
-#define need_arch_feature_POPCNT               0
-#define need_arch_feature_AES                  0
-#define need_arch_feature_XSAVE                        0
-#define need_arch_feature_OSXSAVE              0
-#define need_arch_feature_AVX                  1
-#define need_arch_feature_F16C                 1
-#define need_arch_feature_RDRAND               0
-
-/* EDX.  */
-#define need_arch_feature_FPU                  0
-#define need_arch_feature_TSC                  0
-#define need_arch_feature_MSR                  0
-#define need_arch_feature_CX8                  0
-#define need_arch_feature_SEP                  0
-#define need_arch_feature_CMOV                 0
-#define need_arch_feature_CLFSH                        0
-#define need_arch_feature_MMX                  0
-#define need_arch_feature_FXSR                 0
-#define need_arch_feature_SSE                  0
-#define need_arch_feature_SSE2                 0
-
-/* COMMON_CPUID_INDEX_7.  */
-
-/* EBX.  */
-#define need_arch_feature_FSGSBASE             0
-#define need_arch_feature_BMI1                 0
-#define need_arch_feature_HLE                  0
-#define need_arch_feature_AVX2                 1
-#define need_arch_feature_BMI2                 0
-#define need_arch_feature_ERMS                 0
-#define need_arch_feature_RTM                  0
-#define need_arch_feature_AVX512F              1
-#define need_arch_feature_AVX512DQ             1
-#define need_arch_feature_RDSEED               0
-#define need_arch_feature_ADX                  0
-#define need_arch_feature_AVX512_IFMA          1
-#define need_arch_feature_CLFLUSHOPT           0
-#define need_arch_feature_CLWB                 0
-#define need_arch_feature_AVX512PF             1
-#define need_arch_feature_AVX512ER             1
-#define need_arch_feature_AVX512CD             1
-#define need_arch_feature_SHA                  0
-#define need_arch_feature_AVX512BW             1
-#define need_arch_feature_AVX512VL             1
-
-/* ECX.  */
-#define need_arch_feature_PREFETCHWT1          0
-#define need_arch_feature_AVX512_VBMI          1
-#define need_arch_feature_AVX512_VBMI2         1
-#define need_arch_feature_GFNI                 0
-#define need_arch_feature_VAES                 1
-#define need_arch_feature_VPCLMULQDQ           1
-#define need_arch_feature_AVX512_VNNI          1
-#define need_arch_feature_AVX512_BITALG                1
-#define need_arch_feature_AVX512_VPOPCNTDQ     1
-#define need_arch_feature_RDPID                        0
-#define need_arch_feature_CLDEMOTE             0
-#define need_arch_feature_MOVDIRI              0
-#define need_arch_feature_MOVDIR64B            0
-
-/* EDX.  */
-#define need_arch_feature_AVX512_4VNNIW                1
-#define need_arch_feature_AVX512_4FMAPS                1
-#define need_arch_feature_FSRM                 0
-
-/* COMMON_CPUID_INDEX_80000001.  */
-
-/* ECX.  */
-#define need_arch_feature_LAHF64_SAHF64                0
-#define need_arch_feature_LZCNT                        0
-#define need_arch_feature_SSE4A                        0
-#define need_arch_feature_PREFETCHW            0
-#define need_arch_feature_XOP                  1
-#define need_arch_feature_FMA4                 1
-#define need_arch_feature_TBM                  0
-#define need_arch_feature_SYSCALL_SYSRET       0
-#define need_arch_feature_RDTSCP               0
-#define need_arch_feature_XSAVEOPT             0
-#define need_arch_feature_XSAVEC               1
-#define need_arch_feature_XGETBV_ECX_1         0
-#define need_arch_feature_XSAVES               0
-#define need_arch_feature_INVARIANT_TSC                0
-#define need_arch_feature_WBNOINVD             0
+#define bit_arch_AVX512_VP2INTERSECT_Usable    (1u << 24)
+#define bit_arch_AVX512_BF16_Usable            (1u << 25)
+#define bit_arch_PKU_Usable                    (1u << 26)
+
+#define index_arch_AVX_Usable                  USABLE_FEATURE_INDEX_1
+#define index_arch_AVX2_Usable                 USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512F_Usable              USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512CD_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512ER_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512PF_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512VL_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512BW_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512DQ_Usable             USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_4FMAPS_Usable                USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_4VNNIW_Usable                USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_BITALG_Usable                USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_IFMA_Usable          USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI_Usable          USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI2_Usable         USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_VNNI_Usable          USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_VPOPCNTDQ_Usable     USABLE_FEATURE_INDEX_1
+#define index_arch_FMA_Usable                  USABLE_FEATURE_INDEX_1
+#define index_arch_FMA4_Usable                 USABLE_FEATURE_INDEX_1
+#define index_arch_VAES_Usable                 USABLE_FEATURE_INDEX_1
+#define index_arch_VPCLMULQDQ_Usable           USABLE_FEATURE_INDEX_1
+#define index_arch_XOP_Usable                  USABLE_FEATURE_INDEX_1
+#define index_arch_XSAVEC_Usable               USABLE_FEATURE_INDEX_1
+#define index_arch_F16C_Usable                 USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_VP2INTERSECT_Usable  USABLE_FEATURE_INDEX_1
+#define index_arch_AVX512_BF16_Usable          USABLE_FEATURE_INDEX_1
+#define index_arch_PKU_Usable                  USABLE_FEATURE_INDEX_1
+
+#define feature_AVX_Usable                     usable
+#define feature_AVX2_Usable                    usable
+#define feature_AVX512F_Usable                 usable
+#define feature_AVX512CD_Usable                        usable
+#define feature_AVX512ER_Usable                        usable
+#define feature_AVX512PF_Usable                        usable
+#define feature_AVX512VL_Usable                        usable
+#define feature_AVX512BW_Usable                        usable
+#define feature_AVX512DQ_Usable                        usable
+#define feature_AVX512_4FMAPS_Usable           usable
+#define feature_AVX512_4VNNIW_Usable           usable
+#define feature_AVX512_BITALG_Usable           usable
+#define feature_AVX512_IFMA_Usable             usable
+#define feature_AVX512_VBMI_Usable             usable
+#define feature_AVX512_VBMI2_Usable            usable
+#define feature_AVX512_VNNI_Usable             usable
+#define feature_AVX512_VPOPCNTDQ_Usable                usable
+#define feature_FMA_Usable                     usable
+#define feature_FMA4_Usable                    usable
+#define feature_VAES_Usable                    usable
+#define feature_VPCLMULQDQ_Usable              usable
+#define feature_XOP_Usable                     usable
+#define feature_XSAVEC_Usable                  usable
+#define feature_F16C_Usable                    usable
+#define feature_AVX512_VP2INTERSECT_Usable     usable
+#define feature_AVX512_BF16_Usable             usable
+#define feature_PKU_Usable                     usable
 
 /* CPU features.  */
 
@@ -494,17 +332,26 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define bit_cpu_CLDEMOTE       (1u << 25)
 #define bit_cpu_MOVDIRI                (1u << 27)
 #define bit_cpu_MOVDIR64B      (1u << 28)
+#define bit_cpu_ENQCMD         (1u << 29)
 #define bit_cpu_SGX_LC         (1u << 30)
+#define bit_cpu_PKS            (1u << 31)
 
 /* EDX.  */
 #define bit_cpu_AVX512_4VNNIW  (1u << 2)
 #define bit_cpu_AVX512_4FMAPS  (1u << 3)
 #define bit_cpu_FSRM           (1u << 4)
+#define bit_cpu_AVX512_VP2INTERSECT (1u << 8)
+#define bit_cpu_MD_CLEAR       (1u << 10)
+#define bit_cpu_SERIALIZE      (1u << 14)
+#define bit_cpu_HYBRID         (1u << 15)
+#define bit_cpu_TSXLDTRK       (1u << 16)
 #define bit_cpu_PCONFIG                (1u << 18)
 #define bit_cpu_IBT            (1u << 20)
 #define bit_cpu_IBRS_IBPB      (1u << 26)
 #define bit_cpu_STIBP          (1u << 27)
-#define bit_cpu_CAPABILITIES   (1u << 29)
+#define bit_cpu_L1D_FLUSH      (1u << 28)
+#define bit_cpu_ARCH_CAPABILITIES (1u << 29)
+#define bit_cpu_CORE_CAPABILITIES (1u << 30)
 #define bit_cpu_SSBD           (1u << 31)
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -545,6 +392,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define bit_cpu_WBNOINVD       (1u << 9)
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define bit_cpu_AVX512_BF16    (1u << 5)
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -662,17 +514,26 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define index_cpu_CLDEMOTE     COMMON_CPUID_INDEX_7
 #define index_cpu_MOVDIRI      COMMON_CPUID_INDEX_7
 #define index_cpu_MOVDIR64B    COMMON_CPUID_INDEX_7
+#define index_cpu_ENQCMD       COMMON_CPUID_INDEX_7
 #define index_cpu_SGX_LC       COMMON_CPUID_INDEX_7
+#define index_cpu_PKS          COMMON_CPUID_INDEX_7
 
 /* EDX.  */
 #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
 #define index_cpu_AVX512_4FMAPS        COMMON_CPUID_INDEX_7
 #define index_cpu_FSRM         COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7
+#define index_cpu_MD_CLEAR     COMMON_CPUID_INDEX_7
+#define index_cpu_SERIALIZE    COMMON_CPUID_INDEX_7
+#define index_cpu_HYBRID       COMMON_CPUID_INDEX_7
+#define index_cpu_TSXLDTRK     COMMON_CPUID_INDEX_7
 #define index_cpu_PCONFIG      COMMON_CPUID_INDEX_7
 #define index_cpu_IBT          COMMON_CPUID_INDEX_7
 #define index_cpu_IBRS_IBPB    COMMON_CPUID_INDEX_7
 #define index_cpu_STIBP                COMMON_CPUID_INDEX_7
-#define index_cpu_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_L1D_FLUSH    COMMON_CPUID_INDEX_7
+#define index_cpu_ARCH_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_CORE_CAPABILITIES COMMON_CPUID_INDEX_7
 #define index_cpu_SSBD         COMMON_CPUID_INDEX_7
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -713,6 +574,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define index_cpu_WBNOINVD     COMMON_CPUID_INDEX_80000008
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define index_cpu_AVX512_BF16  COMMON_CPUID_INDEX_7_ECX_1
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -830,17 +696,26 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define reg_CLDEMOTE           ecx
 #define reg_MOVDIRI            ecx
 #define reg_MOVDIR64B          ecx
+#define reg_ENQCMD             ecx
 #define reg_SGX_LC             ecx
+#define reg_PKS                        ecx
 
 /* EDX.  */
 #define reg_AVX512_4VNNIW      edx
 #define reg_AVX512_4FMAPS      edx
 #define reg_FSRM               edx
+#define reg_AVX512_VP2INTERSECT        edx
+#define reg_MD_CLEAR           edx
+#define reg_SERIALIZE          edx
+#define reg_HYBRID             edx
+#define reg_TSXLDTRK           edx
 #define reg_PCONFIG            edx
 #define reg_IBT                        edx
 #define reg_IBRS_IBPB          edx
 #define reg_STIBP              edx
-#define reg_CAPABILITIES       edx
+#define reg_L1D_FLUSH          edx
+#define reg_ARCH_CAPABILITIES  edx
+#define reg_CORE_CAPABILITIES  edx
 #define reg_SSBD               edx
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -881,6 +756,11 @@ extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define reg_WBNOINVD           ebx
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define reg_AVX512_BF16                eax
+
 /* FEATURE_INDEX_2.  */
 #define bit_arch_I586                          (1u << 0)
 #define bit_arch_I686                          (1u << 1)
@@ -899,22 +779,39 @@ extern const struct cpu_features *__get_cpu_features (void)
 #define bit_arch_Prefer_No_AVX512              (1u << 14)
 #define bit_arch_MathVec_Prefer_No_AVX512      (1u << 15)
 
-#define index_arch_Fast_Rep_String             FEATURE_INDEX_2
-#define index_arch_Fast_Copy_Backward          FEATURE_INDEX_2
-#define index_arch_Slow_BSF                    FEATURE_INDEX_2
-#define index_arch_Fast_Unaligned_Load         FEATURE_INDEX_2
-#define index_arch_Prefer_PMINUB_for_stringop  FEATURE_INDEX_2
-#define index_arch_Fast_Unaligned_Copy         FEATURE_INDEX_2
-#define index_arch_I586                                FEATURE_INDEX_2
-#define index_arch_I686                                FEATURE_INDEX_2
-#define index_arch_Slow_SSE4_2                 FEATURE_INDEX_2
-#define index_arch_AVX_Fast_Unaligned_Load     FEATURE_INDEX_2
-#define index_arch_Prefer_MAP_32BIT_EXEC       FEATURE_INDEX_2
-#define index_arch_Prefer_No_VZEROUPPER                FEATURE_INDEX_2
-#define index_arch_Prefer_ERMS                 FEATURE_INDEX_2
-#define index_arch_Prefer_No_AVX512            FEATURE_INDEX_2
-#define index_arch_MathVec_Prefer_No_AVX512    FEATURE_INDEX_2
-#define index_arch_Prefer_FSRM                 FEATURE_INDEX_2
+#define index_arch_Fast_Rep_String             PREFERRED_FEATURE_INDEX_1
+#define index_arch_Fast_Copy_Backward          PREFERRED_FEATURE_INDEX_1
+#define index_arch_Slow_BSF                    PREFERRED_FEATURE_INDEX_1
+#define index_arch_Fast_Unaligned_Load         PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_PMINUB_for_stringop  PREFERRED_FEATURE_INDEX_1
+#define index_arch_Fast_Unaligned_Copy         PREFERRED_FEATURE_INDEX_1
+#define index_arch_I586                                PREFERRED_FEATURE_INDEX_1
+#define index_arch_I686                                PREFERRED_FEATURE_INDEX_1
+#define index_arch_Slow_SSE4_2                 PREFERRED_FEATURE_INDEX_1
+#define index_arch_AVX_Fast_Unaligned_Load     PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_MAP_32BIT_EXEC       PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_No_VZEROUPPER                PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_ERMS                 PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_No_AVX512            PREFERRED_FEATURE_INDEX_1
+#define index_arch_MathVec_Prefer_No_AVX512    PREFERRED_FEATURE_INDEX_1
+#define index_arch_Prefer_FSRM                 PREFERRED_FEATURE_INDEX_1
+
+#define feature_Fast_Rep_String                        preferred
+#define feature_Fast_Copy_Backward             preferred
+#define feature_Slow_BSF                       preferred
+#define feature_Fast_Unaligned_Load            preferred
+#define feature_Prefer_PMINUB_for_stringop     preferred
+#define feature_Fast_Unaligned_Copy            preferred
+#define feature_I586                           preferred
+#define feature_I686                           preferred
+#define feature_Slow_SSE4_2                    preferred
+#define feature_AVX_Fast_Unaligned_Load                preferred
+#define feature_Prefer_MAP_32BIT_EXEC          preferred
+#define feature_Prefer_No_VZEROUPPER           preferred
+#define feature_Prefer_ERMS                    preferred
+#define feature_Prefer_No_AVX512               preferred
+#define feature_MathVec_Prefer_No_AVX512       preferred
+#define feature_Prefer_FSRM                    preferred
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state          (1u << 1)
index 38ad2c2..666ec57 100644 (file)
@@ -54,7 +54,7 @@ extern __typeof (memcmp) DEFAULT_MEMCMP;
   _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);       \
   if (!DEFAULT_MEMCMP (f, #name, len))                                 \
     {                                                                  \
-      cpu_features->feature[index_arch_##name]                         \
+      cpu_features->feature_##name[index_arch_##name]                  \
        &= ~bit_arch_##name;                                            \
       break;                                                           \
     }
@@ -66,10 +66,10 @@ extern __typeof (memcmp) DEFAULT_MEMCMP;
   if (!DEFAULT_MEMCMP (f, #name, len))                                 \
     {                                                                  \
       if (disable)                                                     \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          &= ~bit_arch_##name;                                          \
       else                                                             \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          |= bit_arch_##name;                                           \
       break;                                                           \
     }
@@ -82,10 +82,10 @@ extern __typeof (memcmp) DEFAULT_MEMCMP;
   if (!DEFAULT_MEMCMP (f, #name, len))                                 \
     {                                                                  \
       if (disable)                                                     \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          &= ~bit_arch_##name;                                          \
       else if (CPU_FEATURES_ARCH_P (cpu_features, need))               \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          |= bit_arch_##name;                                           \
       break;                                                           \
     }
@@ -98,10 +98,10 @@ extern __typeof (memcmp) DEFAULT_MEMCMP;
   if (!DEFAULT_MEMCMP (f, #name, len))                                 \
     {                                                                  \
       if (disable)                                                     \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          &= ~bit_arch_##name;                                          \
       else if (CPU_FEATURES_CPU_P (cpu_features, need))                        \
-       cpu_features->feature[index_arch_##name]                        \
+       cpu_features->feature_##name[index_arch_##name]                 \
          |= bit_arch_##name;                                           \
       break;                                                           \
     }
index 0dcb906..c60918c 100644 (file)
@@ -174,15 +174,24 @@ do_test (void)
   CHECK_CPU_FEATURE (CLDEMOTE);
   CHECK_CPU_FEATURE (MOVDIRI);
   CHECK_CPU_FEATURE (MOVDIR64B);
+  CHECK_CPU_FEATURE (ENQCMD);
   CHECK_CPU_FEATURE (SGX_LC);
+  CHECK_CPU_FEATURE (PKS);
   CHECK_CPU_FEATURE (AVX512_4VNNIW);
   CHECK_CPU_FEATURE (AVX512_4FMAPS);
   CHECK_CPU_FEATURE (FSRM);
+  CHECK_CPU_FEATURE (AVX512_VP2INTERSECT);
+  CHECK_CPU_FEATURE (MD_CLEAR);
+  CHECK_CPU_FEATURE (SERIALIZE);
+  CHECK_CPU_FEATURE (HYBRID);
+  CHECK_CPU_FEATURE (TSXLDTRK);
   CHECK_CPU_FEATURE (PCONFIG);
   CHECK_CPU_FEATURE (IBT);
   CHECK_CPU_FEATURE (IBRS_IBPB);
   CHECK_CPU_FEATURE (STIBP);
-  CHECK_CPU_FEATURE (CAPABILITIES);
+  CHECK_CPU_FEATURE (L1D_FLUSH);
+  CHECK_CPU_FEATURE (ARCH_CAPABILITIES);
+  CHECK_CPU_FEATURE (CORE_CAPABILITIES);
   CHECK_CPU_FEATURE (SSBD);
   CHECK_CPU_FEATURE (LAHF64_SAHF64);
   CHECK_CPU_FEATURE (SVM);
@@ -204,84 +213,36 @@ do_test (void)
   CHECK_CPU_FEATURE (XSAVES);
   CHECK_CPU_FEATURE (INVARIANT_TSC);
   CHECK_CPU_FEATURE (WBNOINVD);
+  CHECK_CPU_FEATURE (AVX512_BF16);
 
   printf ("Usable CPU features:\n");
-  CHECK_CPU_FEATURE_USABLE (SSE3);
-  CHECK_CPU_FEATURE_USABLE (PCLMULQDQ);
-  CHECK_CPU_FEATURE_USABLE (SSSE3);
   CHECK_CPU_FEATURE_USABLE (FMA);
-  CHECK_CPU_FEATURE_USABLE (CMPXCHG16B);
-  CHECK_CPU_FEATURE_USABLE (SSE4_1);
-  CHECK_CPU_FEATURE_USABLE (SSE4_2);
-  CHECK_CPU_FEATURE_USABLE (MOVBE);
-  CHECK_CPU_FEATURE_USABLE (POPCNT);
-  CHECK_CPU_FEATURE_USABLE (AES);
-  CHECK_CPU_FEATURE_USABLE (XSAVE);
-  CHECK_CPU_FEATURE_USABLE (OSXSAVE);
   CHECK_CPU_FEATURE_USABLE (AVX);
   CHECK_CPU_FEATURE_USABLE (F16C);
-  CHECK_CPU_FEATURE_USABLE (RDRAND);
-  CHECK_CPU_FEATURE_USABLE (FPU);
-  CHECK_CPU_FEATURE_USABLE (TSC);
-  CHECK_CPU_FEATURE_USABLE (MSR);
-  CHECK_CPU_FEATURE_USABLE (CX8);
-  CHECK_CPU_FEATURE_USABLE (SEP);
-  CHECK_CPU_FEATURE_USABLE (CMOV);
-  CHECK_CPU_FEATURE_USABLE (CLFSH);
-  CHECK_CPU_FEATURE_USABLE (MMX);
-  CHECK_CPU_FEATURE_USABLE (FXSR);
-  CHECK_CPU_FEATURE_USABLE (SSE);
-  CHECK_CPU_FEATURE_USABLE (SSE2);
-  CHECK_CPU_FEATURE_USABLE (FSGSBASE);
-  CHECK_CPU_FEATURE_USABLE (BMI1);
-  CHECK_CPU_FEATURE_USABLE (HLE);
   CHECK_CPU_FEATURE_USABLE (AVX2);
-  CHECK_CPU_FEATURE_USABLE (BMI2);
-  CHECK_CPU_FEATURE_USABLE (ERMS);
   CHECK_CPU_FEATURE_USABLE (AVX512F);
   CHECK_CPU_FEATURE_USABLE (AVX512DQ);
-  CHECK_CPU_FEATURE_USABLE (RDSEED);
-  CHECK_CPU_FEATURE_USABLE (ADX);
   CHECK_CPU_FEATURE_USABLE (AVX512_IFMA);
-  CHECK_CPU_FEATURE_USABLE (CLFLUSHOPT);
-  CHECK_CPU_FEATURE_USABLE (CLWB);
   CHECK_CPU_FEATURE_USABLE (AVX512PF);
   CHECK_CPU_FEATURE_USABLE (AVX512ER);
   CHECK_CPU_FEATURE_USABLE (AVX512CD);
-  CHECK_CPU_FEATURE_USABLE (SHA);
   CHECK_CPU_FEATURE_USABLE (AVX512BW);
   CHECK_CPU_FEATURE_USABLE (AVX512VL);
-  CHECK_CPU_FEATURE_USABLE (PREFETCHWT1);
   CHECK_CPU_FEATURE_USABLE (AVX512_VBMI);
+  CHECK_CPU_FEATURE_USABLE (PKU);
   CHECK_CPU_FEATURE_USABLE (AVX512_VBMI2);
-  CHECK_CPU_FEATURE_USABLE (GFNI);
   CHECK_CPU_FEATURE_USABLE (VAES);
   CHECK_CPU_FEATURE_USABLE (VPCLMULQDQ);
   CHECK_CPU_FEATURE_USABLE (AVX512_VNNI);
   CHECK_CPU_FEATURE_USABLE (AVX512_BITALG);
   CHECK_CPU_FEATURE_USABLE (AVX512_VPOPCNTDQ);
-  CHECK_CPU_FEATURE_USABLE (RDPID);
-  CHECK_CPU_FEATURE_USABLE (CLDEMOTE);
-  CHECK_CPU_FEATURE_USABLE (MOVDIRI);
-  CHECK_CPU_FEATURE_USABLE (MOVDIR64B);
   CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
   CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
-  CHECK_CPU_FEATURE_USABLE (FSRM);
-  CHECK_CPU_FEATURE_USABLE (LAHF64_SAHF64);
-  CHECK_CPU_FEATURE_USABLE (LZCNT);
-  CHECK_CPU_FEATURE_USABLE (SSE4A);
-  CHECK_CPU_FEATURE_USABLE (PREFETCHW);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
   CHECK_CPU_FEATURE_USABLE (XOP);
   CHECK_CPU_FEATURE_USABLE (FMA4);
-  CHECK_CPU_FEATURE_USABLE (TBM);
-  CHECK_CPU_FEATURE_USABLE (SYSCALL_SYSRET);
-  CHECK_CPU_FEATURE_USABLE (RDTSCP);
-  CHECK_CPU_FEATURE_USABLE (XSAVEOPT);
   CHECK_CPU_FEATURE_USABLE (XSAVEC);
-  CHECK_CPU_FEATURE_USABLE (XGETBV_ECX_1);
-  CHECK_CPU_FEATURE_USABLE (XSAVES);
-  CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
-  CHECK_CPU_FEATURE_USABLE (WBNOINVD);
+  CHECK_CPU_FEATURE_USABLE (AVX512_BF16);
 
   return 0;
 }