From eb10b897e45968740fde22626843177b7b2f2191 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 14 Oct 2020 15:17:36 +0300 Subject: [PATCH] habanalabs: reset device upon fw read failure failure in reading pre-boot verion is not handled correctly, upon failure we need to reset the device in order to be able to reinstall the driver. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 4 +++- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/pci.c | 5 ++++- drivers/misc/habanalabs/gaudi/gaudi.c | 27 ++++++++++++++++----------- drivers/misc/habanalabs/goya/goya.c | 22 ++++++++++++++-------- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index fb9d901..2fc12e5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -607,7 +607,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, return -EIO; } - hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + if (rc) + return rc; security_status = RREG32(cpu_security_boot_status_reg); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f00891d..0f4f8ef 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -927,7 +927,7 @@ struct hl_asic_funcs { void (*ctx_fini)(struct hl_ctx *ctx); int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); - void (*read_device_fw_version)(struct hl_device *hdev, + int (*read_device_fw_version)(struct hl_device *hdev, enum hl_fw_component fwc); int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 211f319..02152d8 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -390,8 +390,11 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, rc = hl_fw_read_preboot_status(hdev, cpu_boot_status_reg, cpu_security_boot_status_reg, boot_err0_reg, preboot_ver_timeout); - if (rc) + if (rc) { + dev_err(hdev->dev, "Failed to read preboot version\n"); + hdev->asic_funcs->hw_fini(hdev, true); goto unmap_pci_bars; + } return 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index ecfcfdf..6aa3e38 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3603,7 +3603,7 @@ static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); } -static void gaudi_read_device_fw_version(struct hl_device *hdev, +static int gaudi_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc) { const char *name; @@ -3623,7 +3623,7 @@ static void gaudi_read_device_fw_version(struct hl_device *hdev, break; default: dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); - return; + return -EIO; } ver_off &= ~((u32)SRAM_BASE_ADDR); @@ -3635,7 +3635,10 @@ static void gaudi_read_device_fw_version(struct hl_device *hdev, dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", name, ver_off); strcpy(dest, "unavailable"); + return -EIO; } + + return 0; } static int gaudi_init_cpu(struct hl_device *hdev) @@ -3925,16 +3928,18 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap); - gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | - HW_CAP_HBM | HW_CAP_PCI_DMA | - HW_CAP_MME | HW_CAP_TPC_MASK | - HW_CAP_HBM_DMA | HW_CAP_PLL | - HW_CAP_NIC_MASK | HW_CAP_MMU | - HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER | - HW_CAP_CLK_GATE); + if (gaudi) { + gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | + HW_CAP_HBM | HW_CAP_PCI_DMA | + HW_CAP_MME | HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA | HW_CAP_PLL | + HW_CAP_NIC_MASK | HW_CAP_MMU | + HW_CAP_SRAM_SCRAMBLER | + HW_CAP_HBM_SCRAMBLER | + HW_CAP_CLK_GATE); - memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); + } } static int gaudi_suspend(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ab46794..a0580b8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2341,7 +2341,7 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev) * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx. * The version string should be located by that offset. */ -static void goya_read_device_fw_version(struct hl_device *hdev, +static int goya_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc) { const char *name; @@ -2361,7 +2361,7 @@ static void goya_read_device_fw_version(struct hl_device *hdev, break; default: dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); - return; + return -EIO; } ver_off &= ~((u32)SRAM_BASE_ADDR); @@ -2373,7 +2373,11 @@ static void goya_read_device_fw_version(struct hl_device *hdev, dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", name, ver_off); strcpy(dest, "unavailable"); + + return -EIO; } + + return 0; } static int goya_init_cpu(struct hl_device *hdev) @@ -2644,12 +2648,14 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset) WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM, 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT); - goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | - HW_CAP_DDR_0 | HW_CAP_DDR_1 | - HW_CAP_DMA | HW_CAP_MME | - HW_CAP_MMU | HW_CAP_TPC_MBIST | - HW_CAP_GOLDEN | HW_CAP_TPC); - memset(goya->events_stat, 0, sizeof(goya->events_stat)); + if (goya) { + goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | + HW_CAP_DDR_0 | HW_CAP_DDR_1 | + HW_CAP_DMA | HW_CAP_MME | + HW_CAP_MMU | HW_CAP_TPC_MBIST | + HW_CAP_GOLDEN | HW_CAP_TPC); + memset(goya->events_stat, 0, sizeof(goya->events_stat)); + } } int goya_suspend(struct hl_device *hdev) -- 2.7.4