habanalabs/gaudi: add debugfs to fetch internal sync status
authorOhad Sharabi <osharabi@habana.ai>
Tue, 22 Mar 2022 12:32:40 +0000 (14:32 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 May 2022 18:57:37 +0000 (20:57 +0200)
When Gaudi device is secured the monitors data in the configuration
space is blocked from PCI access.
As we need to enable user to get sync-manager monitors registers when
debugging, this patch adds a debugfs that dumps the information to a
binary file (blob).
When a root user will trigger the dump, the driver will send request to
the f/w to fill a data structure containing dump of all monitors
registers.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Documentation/ABI/testing/debugfs-driver-habanalabs
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/include/common/cpucp_if.h

index bcf6915..84bf3da 100644 (file)
@@ -190,6 +190,30 @@ Description:    Check and display page fault or access violation mmu errors for
                 echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
                 cat /sys/kernel/debug/habanalabs/hl0/mmu_error
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
+Date:           Mar 2022
+KernelVersion:  5.19
+Contact:        osharabi@habana.ai
+Description:    Allows the root user to dump monitors status from the device's
+                protected config space.
+                This property is a binary blob that contains the result of the
+                monitors registers dump.
+                This custom interface is needed (instead of using the generic
+                Linux user-space PCI mapping) because this space is protected
+                and cannot be accessed using PCI read.
+                This interface doesn't support concurrency in the same device.
+                Only supported on GAUDI.
+
+What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
+Date:           Mar 2022
+KernelVersion:  5.19
+Contact:        osharabi@habana.ai
+Description:    Triggers dump of monitor data. The value to trigger the operation
+                must be 1. Triggering the monitor dump operation initiates dump of
+                current registers values of all monitors.
+                When the write is finished, the user can read the "monitor_dump"
+                blob
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
index f184955..30c637e 100644 (file)
@@ -829,23 +829,67 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
        }
 
        /* Free the previous allocation, if there was any */
-       entry->blob_desc.size = 0;
-       vfree(entry->blob_desc.data);
+       entry->data_dma_blob_desc.size = 0;
+       vfree(entry->data_dma_blob_desc.data);
 
-       entry->blob_desc.data = vmalloc(size);
-       if (!entry->blob_desc.data)
+       entry->data_dma_blob_desc.data = vmalloc(size);
+       if (!entry->data_dma_blob_desc.data)
                return -ENOMEM;
 
        rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
-                                               entry->blob_desc.data);
+                                               entry->data_dma_blob_desc.data);
        if (rc) {
                dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
-               vfree(entry->blob_desc.data);
-               entry->blob_desc.data = NULL;
+               vfree(entry->data_dma_blob_desc.data);
+               entry->data_dma_blob_desc.data = NULL;
                return -EIO;
        }
 
-       entry->blob_desc.size = size;
+       entry->data_dma_blob_desc.size = size;
+
+       return count;
+}
+
+static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 size, trig;
+       ssize_t rc;
+
+       if (hdev->reset_info.in_reset) {
+               dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");
+               return 0;
+       }
+       rc = kstrtouint_from_user(buf, count, 10, &trig);
+       if (rc)
+               return rc;
+
+       if (trig != 1) {
+               dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");
+               return -EINVAL;
+       }
+
+       size = sizeof(struct cpucp_monitor_dump);
+
+       /* Free the previous allocation, if there was any */
+       entry->mon_dump_blob_desc.size = 0;
+       vfree(entry->mon_dump_blob_desc.data);
+
+       entry->mon_dump_blob_desc.data = vmalloc(size);
+       if (!entry->mon_dump_blob_desc.data)
+               return -ENOMEM;
+
+       rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to dump monitors\n");
+               vfree(entry->mon_dump_blob_desc.data);
+               entry->mon_dump_blob_desc.data = NULL;
+               return -EIO;
+       }
+
+       entry->mon_dump_blob_desc.size = size;
 
        return count;
 }
@@ -1235,6 +1279,11 @@ static const struct file_operations hl_dma_size_fops = {
        .write = hl_dma_size_write
 };
 
+static const struct file_operations hl_monitor_dump_fops = {
+       .owner = THIS_MODULE,
+       .write = hl_monitor_dump_trigger
+};
+
 static const struct file_operations hl_i2c_data_fops = {
        .owner = THIS_MODULE,
        .read = hl_i2c_data_read,
@@ -1350,8 +1399,10 @@ void hl_debugfs_add_device(struct hl_device *hdev)
        if (!dev_entry->entry_arr)
                return;
 
-       dev_entry->blob_desc.size = 0;
-       dev_entry->blob_desc.data = NULL;
+       dev_entry->data_dma_blob_desc.size = 0;
+       dev_entry->data_dma_blob_desc.data = NULL;
+       dev_entry->mon_dump_blob_desc.size = 0;
+       dev_entry->mon_dump_blob_desc.data = NULL;
 
        INIT_LIST_HEAD(&dev_entry->file_list);
        INIT_LIST_HEAD(&dev_entry->cb_list);
@@ -1470,7 +1521,18 @@ void hl_debugfs_add_device(struct hl_device *hdev)
        debugfs_create_blob("data_dma",
                                0400,
                                dev_entry->root,
-                               &dev_entry->blob_desc);
+                               &dev_entry->data_dma_blob_desc);
+
+       debugfs_create_file("monitor_dump_trig",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_monitor_dump_fops);
+
+       debugfs_create_blob("monitor_dump",
+                               0400,
+                               dev_entry->root,
+                               &dev_entry->mon_dump_blob_desc);
 
        debugfs_create_x8("skip_reset_on_timeout",
                                0644,
@@ -1509,7 +1571,8 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
 
        mutex_destroy(&entry->file_mutex);
 
-       vfree(entry->blob_desc.data);
+       vfree(entry->data_dma_blob_desc.data);
+       vfree(entry->mon_dump_blob_desc.data);
 
        for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
                vfree(entry->state_dump[i]);
index 42dce28..7d9d585 100644 (file)
@@ -821,6 +821,54 @@ out:
        return rc;
 }
 
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+       struct cpucp_monitor_dump *mon_dump_cpu_addr;
+       dma_addr_t mon_dump_dma_addr;
+       struct cpucp_packet pkt = {};
+       size_t data_size;
+       __le32 *src_ptr;
+       u32 *dst_ptr;
+       u64 result;
+       int i, rc;
+
+       data_size = sizeof(struct cpucp_monitor_dump);
+       mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size,
+                                                                               &mon_dump_dma_addr);
+       if (!mon_dump_cpu_addr) {
+               dev_err(hdev->dev,
+                       "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
+               return -ENOMEM;
+       }
+
+       memset(mon_dump_cpu_addr, 0, data_size);
+
+       pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.addr = cpu_to_le64(mon_dump_dma_addr);
+       pkt.data_max_size = cpu_to_le32(data_size);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                                       HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
+               goto out;
+       }
+
+       /* result contains the actual size */
+       src_ptr = (__le32 *) mon_dump_cpu_addr;
+       dst_ptr = data;
+       for (i = 0; i < (data_size / sizeof(u32)); i++) {
+               *dst_ptr = le32_to_cpu(*src_ptr);
+               src_ptr++;
+               dst_ptr++;
+       }
+
+out:
+       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
+
+       return rc;
+}
+
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
                struct hl_info_pci_counters *counters)
 {
index 1f7758f..ece83b2 100644 (file)
@@ -61,6 +61,7 @@
 
 #define HL_CPUCP_INFO_TIMEOUT_USEC     10000000 /* 10s */
 #define HL_CPUCP_EEPROM_TIMEOUT_USEC   10000000 /* 10s */
+#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */
 
 #define HL_FW_STATUS_POLL_INTERVAL_USEC                10000 /* 10ms */
 
@@ -1293,6 +1294,7 @@ struct fw_load_mgr {
  * @hw_queues_unlock: release H/W queues lock.
  * @get_pci_id: retrieve PCI ID.
  * @get_eeprom_data: retrieve EEPROM data from F/W.
+ * @get_monitor_dump: retrieve monitor registers dump from F/W.
  * @send_cpu_message: send message to F/W. If the message is timedout, the
  *                    driver will eventually reset the device. The timeout can
  *                    be determined by the calling function or it can be 0 and
@@ -1426,8 +1428,8 @@ struct hl_asic_funcs {
        void (*hw_queues_lock)(struct hl_device *hdev);
        void (*hw_queues_unlock)(struct hl_device *hdev);
        u32 (*get_pci_id)(struct hl_device *hdev);
-       int (*get_eeprom_data)(struct hl_device *hdev, void *data,
-                               size_t max_size);
+       int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
+       int (*get_monitor_dump)(struct hl_device *hdev, void *data);
        int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
                                u16 len, u32 timeout, u64 *result);
        int (*pci_bars_map)(struct hl_device *hdev);
@@ -2021,7 +2023,8 @@ struct hl_debugfs_entry {
  * @userptr_spinlock: protects userptr_list.
  * @ctx_mem_hash_list: list of available contexts with MMU mappings.
  * @ctx_mem_hash_spinlock: protects cb_list.
- * @blob_desc: descriptor of blob
+ * @data_dma_blob_desc: data DMA descriptor of blob.
+ * @mon_dump_blob_desc: monitor dump descriptor of blob.
  * @state_dump: data of the system states in case of a bad cs.
  * @state_dump_sem: protects state_dump.
  * @addr: next address to read/write from/to in read/write32.
@@ -2050,7 +2053,8 @@ struct hl_dbg_device_entry {
        spinlock_t                      userptr_spinlock;
        struct list_head                ctx_mem_hash_list;
        spinlock_t                      ctx_mem_hash_spinlock;
-       struct debugfs_blob_wrapper     blob_desc;
+       struct debugfs_blob_wrapper     data_dma_blob_desc;
+       struct debugfs_blob_wrapper     mon_dump_blob_desc;
        char                            *state_dump[HL_STATE_DUMP_HIST_LEN];
        struct rw_semaphore             state_dump_sem;
        u64                             addr;
@@ -3183,6 +3187,7 @@ int hl_fw_cpucp_handshake(struct hl_device *hdev,
                                u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
                                u32 boot_err1_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
                struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
index 2101abf..fdcdf47 100644 (file)
@@ -8500,6 +8500,16 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
        return hl_fw_get_eeprom_data(hdev, data, max_size);
 }
 
+static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+       struct gaudi_device *gaudi = hdev->asic_specific;
+
+       if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
+               return 0;
+
+       return hl_fw_get_monitor_dump(hdev, data);
+}
+
 /*
  * this function should be used only during initialization and/or after reset,
  * when there are no active users.
@@ -9459,6 +9469,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .hw_queues_unlock = gaudi_hw_queues_unlock,
        .get_pci_id = gaudi_get_pci_id,
        .get_eeprom_data = gaudi_get_eeprom_data,
+       .get_monitor_dump = gaudi_get_monitor_dump,
        .send_cpu_message = gaudi_send_cpu_message,
        .pci_bars_map = gaudi_pci_bars_map,
        .init_iatu = gaudi_init_iatu,
index bc8431e..36b3cf5 100644 (file)
@@ -5680,6 +5680,11 @@ static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_siz
        info->page_order_bitmask = 0;
 }
 
+static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+       return -EOPNOTSUPP;
+}
+
 static int goya_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va,
                                        u64 size)
 {
@@ -5739,6 +5744,7 @@ static const struct hl_asic_funcs goya_funcs = {
        .hw_queues_unlock = goya_hw_queues_unlock,
        .get_pci_id = goya_get_pci_id,
        .get_eeprom_data = goya_get_eeprom_data,
+       .get_monitor_dump = goya_get_monitor_dump,
        .send_cpu_message = goya_send_cpu_message,
        .pci_bars_map = goya_pci_bars_map,
        .init_iatu = goya_init_iatu,
index 65668da..4af5bb6 100644 (file)
@@ -389,6 +389,14 @@ enum pq_init_status {
  *
  * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
  *       Packet to perform engine core ASID configuration
+ *
+ * CPUCP_PACKET_MONITOR_DUMP_GET -
+ *       Get monitors registers dump from the CpuCP kernel.
+ *       The CPU will put the registers dump in the a buffer allocated by the driver
+ *       which address is passed via the CpuCp packet. In addition, the host's driver
+ *       passes the max size it allows the CpuCP to write to the structure, to prevent
+ *       data corruption in case of mismatched driver/FW versions.
+ *       Relevant only to Gaudi.
  */
 
 enum cpucp_packet_id {
@@ -439,6 +447,11 @@ enum cpucp_packet_id {
        CPUCP_PACKET_POWER_SET,                 /* internal */
        CPUCP_PACKET_RESERVED,                  /* not used */
        CPUCP_PACKET_ENGINE_CORE_ASID_SET,      /* internal */
+       CPUCP_PACKET_RESERVED2,                 /* not used */
+       CPUCP_PACKET_RESERVED3,                 /* not used */
+       CPUCP_PACKET_RESERVED4,                 /* not used */
+       CPUCP_PACKET_RESERVED5,                 /* not used */
+       CPUCP_PACKET_MONITOR_DUMP_GET,          /* debugfs */
 };
 
 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -889,4 +902,29 @@ struct cpucp_hbm_row_replaced_rows_info {
        struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
 };
 
+/*
+ * struct dcore_monitor_regs_data - DCORE monitor regs data.
+ * the structure follows sync manager block layout. relevant only to Gaudi.
+ * @mon_pay_addrl: array of payload address low bits.
+ * @mon_pay_addrh: array of payload address high bits.
+ * @mon_pay_data: array of payload data.
+ * @mon_arm: array of monitor arm.
+ * @mon_status: array of monitor status.
+ */
+struct dcore_monitor_regs_data {
+       __le32 mon_pay_addrl[512];
+       __le32 mon_pay_addrh[512];
+       __le32 mon_pay_data[512];
+       __le32 mon_arm[512];
+       __le32 mon_status[512];
+};
+
+/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */
+struct cpucp_monitor_dump {
+       struct dcore_monitor_regs_data sync_mngr_w_s;
+       struct dcore_monitor_regs_data sync_mngr_e_s;
+       struct dcore_monitor_regs_data sync_mngr_w_n;
+       struct dcore_monitor_regs_data sync_mngr_e_n;
+};
+
 #endif /* CPUCP_IF_H */