habanalabs: add debugfs write64/read64
authorMoti Haimovski <mhaimovski@habana.ai>
Tue, 12 Nov 2019 07:40:11 +0000 (09:40 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 24 Mar 2020 08:54:16 +0000 (10:54 +0200)
Allow debug user to write/read 64-bit data through debugfs.
This will expedite the dump process of the (large) internal
memories of the device done during debug.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Documentation/ABI/testing/debugfs-driver-habanalabs
drivers/misc/habanalabs/debugfs.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h

index f0ac14b..a73601c 100644 (file)
@@ -43,6 +43,20 @@ Description:    Allows the root user to read or write directly through the
                 If the IOMMU is disabled, it also allows the root user to read
                 or write from the host a device VA of a host mapped memory
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/data64
+Date:           Jan 2020
+KernelVersion:  5.6
+Contact:        oded.gabbay@gmail.com
+Description:    Allows the root user to read or write 64 bit data directly
+                through the device's PCI bar. Writing to this file generates a
+                write transaction while reading from the file generates a read
+                transaction. This custom interface is needed (instead of using
+                the generic Linux user-space PCI mapping) because the DDR bar
+                is very small compared to the DDR memory and only the driver can
+                move the bar before and after the transaction.
+                If the IOMMU is disabled, it also allows the root user to read
+                or write from the host a device VA of a host mapped memory
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/device
 Date:           Jan 2019
 KernelVersion:  5.1
index 599d17d..756d36e 100644 (file)
@@ -710,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
        return count;
 }
 
+static ssize_t hl_data_read64(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[32];
+       u64 addr = entry->addr;
+       u64 val;
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+               return rc;
+       }
+
+       sprintf(tmp_buf, "0x%016llx\n", val);
+       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+                       strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write64(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u64 addr = entry->addr;
+       u64 value;
+       ssize_t rc;
+
+       rc = kstrtoull_from_user(buf, count, 16, &value);
+       if (rc)
+               return rc;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
+                       value, addr);
+               return rc;
+       }
+
+       return count;
+}
+
 static ssize_t hl_get_power_state(struct file *f, char __user *buf,
                size_t count, loff_t *ppos)
 {
@@ -917,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
        .write = hl_data_write32
 };
 
+static const struct file_operations hl_data64b_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_data_read64,
+       .write = hl_data_write64
+};
+
 static const struct file_operations hl_i2c_data_fops = {
        .owner = THIS_MODULE,
        .read = hl_i2c_data_read,
@@ -1030,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
                                dev_entry,
                                &hl_data32b_fops);
 
+       debugfs_create_file("data64",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_data64b_fops);
+
        debugfs_create_file("set_power_state",
                                0200,
                                dev_entry->root,
index f634e9c..0b6567b 100644 (file)
@@ -4180,6 +4180,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
        return rc;
 }
 
+static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 ddr_bar_addr;
+       int rc = 0;
+
+       if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
+               u32 val_l = RREG32(addr - CFG_BASE);
+               u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
+
+               *val = (((u64) val_h) << 32) | val_l;
+
+       } else if ((addr >= SRAM_BASE_ADDR) &&
+                       (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
+
+               *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
+                               (addr - SRAM_BASE_ADDR));
+
+       } else if ((addr >= DRAM_PHYS_BASE) &&
+                  (addr <=
+                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+
+               u64 bar_base_addr = DRAM_PHYS_BASE +
+                               (addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+               ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+               if (ddr_bar_addr != U64_MAX) {
+                       *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
+                                               (addr - bar_base_addr));
+
+                       ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+                                                       ddr_bar_addr);
+               }
+               if (ddr_bar_addr == U64_MAX)
+                       rc = -EIO;
+
+       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+               *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
+       } else {
+               rc = -EFAULT;
+       }
+
+       return rc;
+}
+
+static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 ddr_bar_addr;
+       int rc = 0;
+
+       if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
+               WREG32(addr - CFG_BASE, lower_32_bits(val));
+               WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
+
+       } else if ((addr >= SRAM_BASE_ADDR) &&
+                       (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
+
+               writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
+                                       (addr - SRAM_BASE_ADDR));
+
+       } else if ((addr >= DRAM_PHYS_BASE) &&
+                  (addr <=
+                   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+
+               u64 bar_base_addr = DRAM_PHYS_BASE +
+                               (addr & ~(prop->dram_pci_bar_size - 0x1ull));
+
+               ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+               if (ddr_bar_addr != U64_MAX) {
+                       writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
+                                               (addr - bar_base_addr));
+
+                       ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+                                                       ddr_bar_addr);
+               }
+               if (ddr_bar_addr == U64_MAX)
+                       rc = -EIO;
+
+       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+               *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
+       } else {
+               rc = -EFAULT;
+       }
+
+       return rc;
+}
+
 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
 {
        struct goya_device *goya = hdev->asic_specific;
@@ -5186,6 +5276,8 @@ static const struct hl_asic_funcs goya_funcs = {
        .restore_phase_topology = goya_restore_phase_topology,
        .debugfs_read32 = goya_debugfs_read32,
        .debugfs_write32 = goya_debugfs_write32,
+       .debugfs_read64 = goya_debugfs_read64,
+       .debugfs_write64 = goya_debugfs_write64,
        .add_device_attr = goya_add_device_attr,
        .handle_eqe = goya_handle_eqe,
        .set_pll_profile = goya_set_pll_profile,
index 9549062..4ef8cf2 100644 (file)
@@ -582,6 +582,8 @@ struct hl_asic_funcs {
        void (*restore_phase_topology)(struct hl_device *hdev);
        int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
        int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
+       int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
+       int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
        void (*add_device_attr)(struct hl_device *hdev,
                                struct attribute_group *dev_attr_grp);
        void (*handle_eqe)(struct hl_device *hdev,