x86: Add support for AVX10 preset and vec size in cpu-features
authorNoah Goldstein <goldstein.w.n@gmail.com>
Wed, 20 Sep 2023 20:44:50 +0000 (15:44 -0500)
committerNoah Goldstein <goldstein.w.n@gmail.com>
Fri, 29 Sep 2023 19:18:42 +0000 (14:18 -0500)
This commit add support for the new AVX10 cpu features:
https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf

We add checks for:
    - `AVX10`: Check if AVX10 is present.
    - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.

`make check` passes and cpuid output was checked against GNR/DMR on an
emulator.

manual/platform.texi
sysdeps/x86/bits/platform/x86.h
sysdeps/x86/cpu-features.c
sysdeps/x86/include/cpu-features.h
sysdeps/x86/tst-get-cpu-features.c

index 2a2d557067ad60910ebec4d47734b00094180679..478b6fdcdfa1377201d8acd7671244e48e1d9e4e 100644 (file)
@@ -222,6 +222,18 @@ Leaf (EAX = 23H).
 @item
 @code{AVX} -- The AVX instruction extensions.
 
+@item
+@code{AVX10} -- The AVX10 instruction extensions.
+
+@item
+@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
+
+@item
+@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
+
+@item
+@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
+
 @item
 @code{AVX2} -- The AVX2 instruction extensions.
 
index 88ca071aa7f6c15d6d2567a8d74bdd5c21884e00..1e23d53ba2c4a1ef4bc35998737fdb9a1046be1d 100644 (file)
@@ -30,7 +30,8 @@ enum
   CPUID_INDEX_80000008,
   CPUID_INDEX_7_ECX_1,
   CPUID_INDEX_19,
-  CPUID_INDEX_14_ECX_0
+  CPUID_INDEX_14_ECX_0,
+  CPUID_INDEX_24_ECX_0
 };
 
 struct cpuid_feature
@@ -312,6 +313,7 @@ enum
   x86_cpu_AVX_NE_CONVERT       = x86_cpu_index_7_ecx_1_edx + 5,
   x86_cpu_AMX_COMPLEX          = x86_cpu_index_7_ecx_1_edx + 8,
   x86_cpu_PREFETCHI            = x86_cpu_index_7_ecx_1_edx + 14,
+  x86_cpu_AVX10                        = x86_cpu_index_7_ecx_1_edx + 19,
   x86_cpu_APX_F                        = x86_cpu_index_7_ecx_1_edx + 21,
 
   x86_cpu_index_19_ebx
@@ -325,5 +327,13 @@ enum
     = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
        + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
 
-  x86_cpu_PTWRITE              = x86_cpu_index_14_ecx_0_ebx + 4
+  x86_cpu_PTWRITE              = x86_cpu_index_14_ecx_0_ebx + 4,
+
+  x86_cpu_index_24_ecx_0_ebx
+    = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+       + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
+
+  x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
+  x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
+  x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
 };
index badf088874703721e1b1f830734f6493b2aa072f..0bf923d48b92c04d7e49bda77710798d8f72a8d2 100644 (file)
@@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
   CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
 #endif
 
+  enum
+  {
+    os_xmm = 1,
+    os_ymm = 2,
+    os_zmm = 4
+  } os_vector_size = os_xmm;
   /* Can we call xgetbv?  */
   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
     {
       unsigned int xcrlow;
       unsigned int xcrhigh;
+      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
       /* Is YMM and XMM state usable?  */
       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
          /* Determine if AVX is usable.  */
          if (CPU_FEATURES_CPU_P (cpu_features, AVX))
            {
+             os_vector_size |= os_ymm;
              CPU_FEATURE_SET (cpu_features, AVX);
              /* The following features depend on AVX being usable.  */
              /* Determine if AVX2 is usable.  */
@@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
                         | bit_ZMM16_31_state))
              == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
            {
+             os_vector_size |= os_zmm;
              /* Determine if AVX512F is usable.  */
              if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
                {
@@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
            }
        }
 
+      if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+         && cpu_features->basic.max_cpuid >= 0x24)
+       {
+         __cpuid_count (
+             0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+             cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+             cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+             cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+         if (os_vector_size & os_xmm)
+           CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
+         if (os_vector_size & os_ymm)
+           CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
+         if (os_vector_size & os_zmm)
+           CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
+       }
+
       /* Are XTILECFG and XTILEDATA states usable?  */
       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
          == (bit_XTILECFG_state | bit_XTILEDATA_state))
index eb30d342a65ecc736e1bc96093b0d757347ec3e3..2d7427a6c010d6c469f782852b1958abb334ca56 100644 (file)
@@ -29,7 +29,7 @@
 
 enum
 {
-  CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+  CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
 };
 
 enum
@@ -319,6 +319,7 @@ enum
 #define bit_cpu_AVX_NE_CONVERT (1u << 5)
 #define bit_cpu_AMX_COMPLEX    (1u << 8)
 #define bit_cpu_PREFETCHI      (1u << 14)
+#define bit_cpu_AVX10          (1u << 19)
 #define bit_cpu_APX_F          (1u << 21)
 
 /* CPUID_INDEX_19.  */
@@ -332,6 +333,13 @@ enum
 /* EBX.  */
 #define bit_cpu_PTWRITE                (1u << 4)
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define bit_cpu_AVX10_XMM              (1u << 16)
+#define bit_cpu_AVX10_YMM              (1u << 17)
+#define bit_cpu_AVX10_ZMM              (1u << 18)
+
 /* CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -563,6 +571,7 @@ enum
 #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
 #define index_cpu_AMX_COMPLEX  CPUID_INDEX_7_ECX_1
 #define index_cpu_PREFETCHI    CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10                CPUID_INDEX_7_ECX_1
 #define index_cpu_APX_F                CPUID_INDEX_7_ECX_1
 
 /* CPUID_INDEX_19.  */
@@ -576,6 +585,13 @@ enum
 /* EBX.  */
 #define index_cpu_PTWRITE      CPUID_INDEX_14_ECX_0
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define index_cpu_AVX10_XMM    CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_YMM    CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_ZMM    CPUID_INDEX_24_ECX_0
+
 /* CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -809,6 +825,7 @@ enum
 #define reg_AVX_NE_CONVERT     edx
 #define reg_AMX_COMPLEX                edx
 #define reg_PREFETCHI          edx
+#define reg_AVX10              edx
 #define reg_APX_F              edx
 
 /* CPUID_INDEX_19.  */
@@ -822,6 +839,14 @@ enum
 /* EBX.  */
 #define reg_PTWRITE            ebx
 
+/* CPUID_INDEX_24_ECX_0.  */
+
+/* EBX.  */
+#define reg_AVX10_XMM          ebx
+#define reg_AVX10_YMM          ebx
+#define reg_AVX10_ZMM          ebx
+
+
 /* PREFERRED_FEATURE_INDEX_1.  First define the bitindex values
    sequentially, then define the bit_arch* and index_arch_* lookup
    constants.  */
index b27fa7324a911db7921e531823fe317675835b5f..44edd18df2c6f2cc53325237717f2eff353d3f83 100644 (file)
@@ -219,6 +219,7 @@ do_test (void)
   CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
   CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+  CHECK_CPU_FEATURE_PRESENT (AVX10);
   CHECK_CPU_FEATURE_PRESENT (APX_F);
   CHECK_CPU_FEATURE_PRESENT (AESKLE);
   CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@ do_test (void)
   CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
   CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+  CHECK_CPU_FEATURE_ACTIVE (AVX10);
   CHECK_CPU_FEATURE_ACTIVE (APX_F);
   CHECK_CPU_FEATURE_ACTIVE (AESKLE);
   CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
   CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
 
+  if (CPU_FEATURE_ACTIVE (AVX10))
+    {
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
+      CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
+    }
   return 0;
 }