drm/amdkfd: Add topology support for CPUs
authorFelix Kuehling <Felix.Kuehling@amd.com>
Sat, 9 Dec 2017 04:08:58 +0000 (23:08 -0500)
committerOded Gabbay <oded.gabbay@gmail.com>
Sat, 9 Dec 2017 04:08:58 +0000 (23:08 -0500)
Currently, the KFD topology information is generated by parsing the CRAT
(ACPI) table. However, at present CRAT table is available only for AMD
APUs. To support CPUs on systems without a CRAT table, the KFD driver will
create a Virtual CRAT (VCRAT) table and then the existing code will parse
that table to generate topology.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.h
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.h

index e62493b..c8afbf8 100644 (file)
  */
 #include <linux/acpi.h>
 #include "kfd_crat.h"
+#include "kfd_priv.h"
 #include "kfd_topology.h"
 
-extern struct kfd_system_properties sys_props;
-
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
                struct crat_subtype_computeunit *cu)
 {
@@ -281,7 +280,7 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
                         uint32_t proximity_domain)
 {
-       struct kfd_topology_device *top_dev;
+       struct kfd_topology_device *top_dev = NULL;
        struct crat_subtype_generic *sub_type_hdr;
        uint16_t node_id;
        int ret = 0;
@@ -314,10 +313,10 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
                goto err;
        }
 
-       sys_props.platform_id =
-               (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK;
-       sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id);
-       sys_props.platform_rev = crat_table->revision;
+       memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
+       memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
+                       CRAT_OEMTABLEID_LENGTH);
+       top_dev->oem_revision = crat_table->oem_revision;
 
        sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
        while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
@@ -385,8 +384,312 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
        return 0;
 }
 
-/*
- * kfd_destroy_crat_image
+/* Memory required to create Virtual CRAT.
+ * Since there is no easy way to predict the amount of memory required, the
+ * following amount are allocated for CPU and GPU Virtual CRAT. This is
+ * expected to cover all known conditions. But to be safe additional check
+ * is put in the code to ensure we don't overwrite.
+ */
+#define VCRAT_SIZE_FOR_CPU     (2 * PAGE_SIZE)
+#define VCRAT_SIZE_FOR_GPU     (3 * PAGE_SIZE)
+
+/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
+ *
+ *     @numa_node_id: CPU NUMA node id
+ *     @avail_size: Available size in the memory
+ *     @sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *     Return 0 if successful else return -ve value
+ */
+static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+                               int proximity_domain,
+                               struct crat_subtype_computeunit *sub_type_hdr)
+{
+       const struct cpumask *cpumask;
+
+       *avail_size -= sizeof(struct crat_subtype_computeunit);
+       if (*avail_size < 0)
+               return -ENOMEM;
+
+       memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+       /* Fill in subtype header data */
+       sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+       sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+       sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+       cpumask = cpumask_of_node(numa_node_id);
+
+       /* Fill in CU data */
+       sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
+       sub_type_hdr->proximity_domain = proximity_domain;
+       sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
+       if (sub_type_hdr->processor_id_low == -1)
+               return -EINVAL;
+
+       sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
+
+       return 0;
+}
+
+/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
+ *
+ *     @numa_node_id: CPU NUMA node id
+ *     @avail_size: Available size in the memory
+ *     @sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *     Return 0 if successful else return -ve value
+ */
+static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+                       int proximity_domain,
+                       struct crat_subtype_memory *sub_type_hdr)
+{
+       uint64_t mem_in_bytes = 0;
+       pg_data_t *pgdat;
+       int zone_type;
+
+       *avail_size -= sizeof(struct crat_subtype_memory);
+       if (*avail_size < 0)
+               return -ENOMEM;
+
+       memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+
+       /* Fill in subtype header data */
+       sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+       sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+       sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+       /* Fill in Memory Subunit data */
+
+       /* Unlike si_meminfo, si_meminfo_node is not exported. So
+        * the following lines are duplicated from si_meminfo_node
+        * function
+        */
+       pgdat = NODE_DATA(numa_node_id);
+       for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+               mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
+       mem_in_bytes <<= PAGE_SHIFT;
+
+       sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
+       sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
+       sub_type_hdr->proximity_domain = proximity_domain;
+
+       return 0;
+}
+
+static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
+                               uint32_t *num_entries,
+                               struct crat_subtype_iolink *sub_type_hdr)
+{
+       int nid;
+       struct cpuinfo_x86 *c = &cpu_data(0);
+       uint8_t link_type;
+
+       if (c->x86_vendor == X86_VENDOR_AMD)
+               link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
+       else
+               link_type = CRAT_IOLINK_TYPE_QPI_1_1;
+
+       *num_entries = 0;
+
+       /* Create IO links from this node to other CPU nodes */
+       for_each_online_node(nid) {
+               if (nid == numa_node_id) /* node itself */
+                       continue;
+
+               *avail_size -= sizeof(struct crat_subtype_iolink);
+               if (*avail_size < 0)
+                       return -ENOMEM;
+
+               memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+               /* Fill in subtype header data */
+               sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+               sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+               sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+               /* Fill in IO link data */
+               sub_type_hdr->proximity_domain_from = numa_node_id;
+               sub_type_hdr->proximity_domain_to = nid;
+               sub_type_hdr->io_interface_type = link_type;
+
+               (*num_entries)++;
+               sub_type_hdr++;
+       }
+
+       return 0;
+}
+
+/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
+ *
+ *     @pcrat_image: Fill in VCRAT for CPU
+ *     @size:  [IN] allocated size of crat_image.
+ *             [OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+{
+       struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+       struct acpi_table_header *acpi_table;
+       acpi_status status;
+       struct crat_subtype_generic *sub_type_hdr;
+       int avail_size = *size;
+       int numa_node_id;
+       uint32_t entries = 0;
+       int ret = 0;
+
+       if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
+               return -EINVAL;
+
+       /* Fill in CRAT Header.
+        * Modify length and total_entries as subunits are added.
+        */
+       avail_size -= sizeof(struct crat_header);
+       if (avail_size < 0)
+               return -ENOMEM;
+
+       memset(crat_table, 0, sizeof(struct crat_header));
+       memcpy(&crat_table->signature, CRAT_SIGNATURE,
+                       sizeof(crat_table->signature));
+       crat_table->length = sizeof(struct crat_header);
+
+       status = acpi_get_table("DSDT", 0, &acpi_table);
+       if (status == AE_NOT_FOUND)
+               pr_warn("DSDT table not found for OEM information\n");
+       else {
+               crat_table->oem_revision = acpi_table->revision;
+               memcpy(crat_table->oem_id, acpi_table->oem_id,
+                               CRAT_OEMID_LENGTH);
+               memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
+                               CRAT_OEMTABLEID_LENGTH);
+       }
+       crat_table->total_entries = 0;
+       crat_table->num_domains = 0;
+
+       sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+
+       for_each_online_node(numa_node_id) {
+               if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
+                       continue;
+
+               /* Fill in Subtype: Compute Unit */
+               ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
+                       crat_table->num_domains,
+                       (struct crat_subtype_computeunit *)sub_type_hdr);
+               if (ret < 0)
+                       return ret;
+               crat_table->length += sub_type_hdr->length;
+               crat_table->total_entries++;
+
+               sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+                       sub_type_hdr->length);
+
+               /* Fill in Subtype: Memory */
+               ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
+                       crat_table->num_domains,
+                       (struct crat_subtype_memory *)sub_type_hdr);
+               if (ret < 0)
+                       return ret;
+               crat_table->length += sub_type_hdr->length;
+               crat_table->total_entries++;
+
+               sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+                       sub_type_hdr->length);
+
+               /* Fill in Subtype: IO Link */
+               ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
+                               &entries,
+                               (struct crat_subtype_iolink *)sub_type_hdr);
+               if (ret < 0)
+                       return ret;
+               crat_table->length += (sub_type_hdr->length * entries);
+               crat_table->total_entries += entries;
+
+               sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+                               sub_type_hdr->length * entries);
+
+               crat_table->num_domains++;
+       }
+
+       /* TODO: Add cache Subtype for CPU.
+        * Currently, CPU cache information is available in function
+        * detect_cache_attributes(cpu) defined in the file
+        * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
+        * exported and to get the same information the code needs to be
+        * duplicated.
+        */
+
+       *size = crat_table->length;
+       pr_info("Virtual CRAT table created for CPU\n");
+
+       return 0;
+}
+
+/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
+ *             creates a Virtual CRAT (VCRAT) image
+ *
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ *     @crat_image: VCRAT image created because ACPI does not have a
+ *                  CRAT for this device
+ *     @size: [OUT] size of virtual crat_image
+ *     @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
+ *             COMPUTE_UNIT_GPU - Create VCRAT for GPU
+ *             (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
+ *                     -- this option is not currently implemented.
+ *                     The assumption is that all AMD APUs will have CRAT
+ *     @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ *
+ *     Return 0 if successful else return -ve value
+ */
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+                                 int flags, struct kfd_dev *kdev,
+                                 uint32_t proximity_domain)
+{
+       void *pcrat_image = NULL;
+       int ret = 0;
+
+       if (!crat_image)
+               return -EINVAL;
+
+       *crat_image = NULL;
+
+       /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
+        * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
+        * all the current conditions. A check is put not to overwrite beyond
+        * allocated size
+        */
+       switch (flags) {
+       case COMPUTE_UNIT_CPU:
+               pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
+               if (!pcrat_image)
+                       return -ENOMEM;
+               *size = VCRAT_SIZE_FOR_CPU;
+               ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
+               break;
+       case COMPUTE_UNIT_GPU:
+               /* TODO: */
+               ret = -EINVAL;
+               pr_err("VCRAT not implemented for dGPU\n");
+               break;
+       case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
+               /* TODO: */
+               ret = -EINVAL;
+               pr_err("VCRAT not implemented for APU\n");
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       if (!ret)
+               *crat_image = pcrat_image;
+       else
+               kfree(pcrat_image);
+
+       return ret;
+}
+
+
+/* kfd_destroy_crat_image
  *
  *     @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
  *
index c15adbd..1711ab6 100644 (file)
 
 #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
 
+/* Compute Unit flags */
+#define COMPUTE_UNIT_CPU       (1 << 0)  /* Create Virtual CRAT for CPU */
+#define COMPUTE_UNIT_GPU       (1 << 1)  /* Create Virtual CRAT for GPU */
+
 struct crat_header {
        uint32_t        signature;
        uint32_t        length;
@@ -302,9 +306,14 @@ struct cdit_header {
 
 #pragma pack()
 
+struct kfd_dev;
+
 int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
 void kfd_destroy_crat_image(void *crat_image);
 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
                         uint32_t proximity_domain);
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+                                 int flags, struct kfd_dev *kdev,
+                                 uint32_t proximity_domain);
 
 #endif /* KFD_CRAT_H_INCLUDED */
index 69a6206..aeee9d4 100644 (file)
@@ -671,6 +671,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
+int kfd_numa_node_to_apic_id(int numa_node_id);
 
 /* Interrupts */
 int kfd_interrupt_init(struct kfd_dev *dev);
index 622feda..9aa6004 100644 (file)
@@ -28,6 +28,8 @@
 #include <linux/hash.h>
 #include <linux/cpufreq.h>
 #include <linux/log2.h>
+#include <linux/dmi.h>
+#include <linux/atomic.h>
 
 #include "kfd_priv.h"
 #include "kfd_crat.h"
 
 /* topology_device_list - Master list of all topology devices */
 static struct list_head topology_device_list;
-struct kfd_system_properties sys_props;
+static struct kfd_system_properties sys_props;
 
 static DECLARE_RWSEM(topology_lock);
+static atomic_t topology_crat_proximity_domain;
 
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
 {
@@ -691,12 +694,92 @@ static void kfd_topology_update_device_list(struct list_head *temp_list,
        }
 }
 
+static void kfd_debug_print_topology(void)
+{
+       struct kfd_topology_device *dev;
+
+       down_read(&topology_lock);
+
+       dev = list_last_entry(&topology_device_list,
+                       struct kfd_topology_device, list);
+       if (dev) {
+               if (dev->node_props.cpu_cores_count &&
+                               dev->node_props.simd_count) {
+                       pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+                               dev->node_props.device_id,
+                               dev->node_props.vendor_id);
+               } else if (dev->node_props.cpu_cores_count)
+                       pr_info("Topology: Add CPU node\n");
+               else if (dev->node_props.simd_count)
+                       pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+                               dev->node_props.device_id,
+                               dev->node_props.vendor_id);
+       }
+       up_read(&topology_lock);
+}
+
+/* Helper function for intializing platform_xx members of
+ * kfd_system_properties. Uses OEM info from the last CPU/APU node.
+ */
+static void kfd_update_system_properties(void)
+{
+       struct kfd_topology_device *dev;
+
+       down_read(&topology_lock);
+       dev = list_last_entry(&topology_device_list,
+                       struct kfd_topology_device, list);
+       if (dev) {
+               sys_props.platform_id =
+                       (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+               sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
+               sys_props.platform_rev = dev->oem_revision;
+       }
+       up_read(&topology_lock);
+}
+
+static void find_system_memory(const struct dmi_header *dm,
+       void *private)
+{
+       struct kfd_mem_properties *mem;
+       u16 mem_width, mem_clock;
+       struct kfd_topology_device *kdev =
+               (struct kfd_topology_device *)private;
+       const u8 *dmi_data = (const u8 *)(dm + 1);
+
+       if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
+               mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
+               mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
+               list_for_each_entry(mem, &kdev->mem_props, list) {
+                       if (mem_width != 0xFFFF && mem_width != 0)
+                               mem->width = mem_width;
+                       if (mem_clock != 0)
+                               mem->mem_clk_max = mem_clock;
+               }
+       }
+}
+/* kfd_add_non_crat_information - Add information that is not currently
+ *     defined in CRAT but is necessary for KFD topology
+ * @dev - topology device to which addition info is added
+ */
+static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
+{
+       /* Check if CPU only node. */
+       if (!kdev->gpu) {
+               /* Add system memory information */
+               dmi_walk(find_system_memory, kdev);
+       }
+       /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
+}
+
 int kfd_topology_init(void)
 {
        void *crat_image = NULL;
        size_t image_size = 0;
        int ret;
        struct list_head temp_topology_device_list;
+       int cpu_only_node = 0;
+       struct kfd_topology_device *kdev;
+       int proximity_domain;
 
        /* topology_device_list - Master list of all topology devices
         * temp_topology_device_list - temporary list created while parsing CRAT
@@ -711,36 +794,78 @@ int kfd_topology_init(void)
 
        memset(&sys_props, 0, sizeof(sys_props));
 
+       /* Proximity domains in ACPI CRAT tables start counting at
+        * 0. The same should be true for virtual CRAT tables created
+        * at this stage. GPUs added later in kfd_topology_add_device
+        * use a counter.
+        */
+       proximity_domain = 0;
+
        /*
-        * Get the CRAT image from the ACPI
+        * Get the CRAT image from the ACPI. If ACPI doesn't have one
+        * create a virtual CRAT.
+        * NOTE: The current implementation expects all AMD APUs to have
+        *      CRAT. If no CRAT is available, it is assumed to be a CPU
         */
        ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
        if (!ret) {
                ret = kfd_parse_crat_table(crat_image,
-                                          &temp_topology_device_list, 0);
-               if (ret)
+                                          &temp_topology_device_list,
+                                          proximity_domain);
+               if (ret) {
+                       kfd_release_topology_device_list(
+                               &temp_topology_device_list);
+                       kfd_destroy_crat_image(crat_image);
+                       crat_image = NULL;
+               }
+       }
+
+       if (!crat_image) {
+               ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+                                                   COMPUTE_UNIT_CPU, NULL,
+                                                   proximity_domain);
+               cpu_only_node = 1;
+               if (ret) {
+                       pr_err("Error creating VCRAT table for CPU\n");
+                       return ret;
+               }
+
+               ret = kfd_parse_crat_table(crat_image,
+                                          &temp_topology_device_list,
+                                          proximity_domain);
+               if (ret) {
+                       pr_err("Error parsing VCRAT table for CPU\n");
                        goto err;
-       } else if (ret == -ENODATA) {
-               /* TODO: Create fake CRAT table */
-               ret = 0;
-               goto err;
-       } else {
-               pr_err("Couldn't get CRAT table size from ACPI\n");
-               goto err;
+               }
        }
 
        down_write(&topology_lock);
        kfd_topology_update_device_list(&temp_topology_device_list,
                                        &topology_device_list);
+       atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
        ret = kfd_topology_update_sysfs();
        up_write(&topology_lock);
 
        if (!ret) {
                sys_props.generation_count++;
+               kfd_update_system_properties();
+               kfd_debug_print_topology();
                pr_info("Finished initializing topology\n");
        } else
                pr_err("Failed to update topology in sysfs ret=%d\n", ret);
 
+       /* For nodes with GPU, this information gets added
+        * when GPU is detected (kfd_topology_add_device).
+        */
+       if (cpu_only_node) {
+               /* Add additional information to CPU only node created above */
+               down_write(&topology_lock);
+               kdev = list_first_entry(&topology_device_list,
+                               struct kfd_topology_device, list);
+               up_write(&topology_lock);
+               kfd_add_non_crat_information(kdev);
+       }
+
 err:
        kfd_destroy_crat_image(crat_image);
        return ret;
@@ -754,21 +879,6 @@ void kfd_topology_shutdown(void)
        up_write(&topology_lock);
 }
 
-static void kfd_debug_print_topology(void)
-{
-       struct kfd_topology_device *dev;
-       uint32_t i = 0;
-
-       pr_info("DEBUG PRINT OF TOPOLOGY:");
-       list_for_each_entry(dev, &topology_device_list, list) {
-               pr_info("Node: %d\n", i);
-               pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
-               pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
-               pr_info("\tSIMD count: %d\n", dev->node_props.simd_count);
-               i++;
-       }
-}
-
 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
 {
        uint32_t hashout;
@@ -954,6 +1064,34 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
 
 }
 
+static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
+{
+       const struct cpuinfo_x86 *cpuinfo;
+       int first_cpu_of_numa_node;
+
+       if (!cpumask || cpumask == cpu_none_mask)
+               return -1;
+       first_cpu_of_numa_node = cpumask_first(cpumask);
+       if (first_cpu_of_numa_node >= nr_cpu_ids)
+               return -1;
+       cpuinfo = &cpu_data(first_cpu_of_numa_node);
+
+       return cpuinfo->apicid;
+}
+
+/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
+ *     of the given NUMA node (numa_node_id)
+ * Return -1 on failure
+ */
+int kfd_numa_node_to_apic_id(int numa_node_id)
+{
+       if (numa_node_id == -1) {
+               pr_warn("Invalid NUMA Node. Use online CPU mask\n");
+               return kfd_cpumask_to_apic_id(cpu_online_mask);
+       }
+       return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
index 50a741b..8668189 100644 (file)
@@ -148,6 +148,9 @@ struct kfd_topology_device {
        struct attribute                attr_gpuid;
        struct attribute                attr_name;
        struct attribute                attr_props;
+       uint8_t                         oem_id[CRAT_OEMID_LENGTH];
+       uint8_t                         oem_table_id[CRAT_OEMTABLEID_LENGTH];
+       uint32_t                        oem_revision;
 };
 
 struct kfd_system_properties {