From 4080308e33bd6ebdb10d0ce62545690cb9be23e4 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 20 May 2021 12:45:58 +0300 Subject: [PATCH] habanalabs/gaudi: use COMMS to reset device / halt CPU This is needed because legacy FW 'communication' protocol will soon become obsolete. Because COMMS is a boot protocol, communicating through it is supported only until Linux is loaded to the device CPU, where in that case we will fallback to the former implementation. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 5 +++- drivers/misc/habanalabs/gaudi/gaudi.c | 36 ++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 399d64e..c19acef 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1390,7 +1390,7 @@ static int hl_fw_dynamic_send_clear_cmd(struct hl_device *hdev, * leftovers between command * NOOP command: necessary to avoid loop on the clear command by the FW */ -static int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, +int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, struct fw_load_mgr *fw_loader, enum comms_cmd cmd, unsigned int size, bool wait_ok, u32 timeout) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 433262b..f1ff4d5 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2574,7 +2574,10 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 sts_boot_dev_sts0_reg, u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg, u32 boot_err1_reg, u32 timeout); - +int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, + struct fw_load_mgr *fw_loader, + enum comms_cmd cmd, unsigned int size, + bool wait_ok, u32 timeout); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4249dff..3bdf5dd 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1931,6 +1931,38 @@ static void gaudi_disable_msi(struct hl_device *hdev) gaudi->hw_cap_initialized &= ~HW_CAP_MSI; } +static void gaudi_fw_hard_reset(struct hl_device *hdev) +{ + int rc; + + if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) { + rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader, + COMMS_RST_DEV, 0, false, + hdev->fw_loader.cpu_timeout); + if (rc) + dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n"); + } else { + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV); + } +} + +static void gaudi_fw_halt_cpu(struct hl_device *hdev) +{ + int rc; + + /* Stop device CPU to make sure nothing bad happens */ + if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) { + rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader, + COMMS_GOTO_WFE, 0, true, + hdev->fw_loader.cpu_timeout); + if (rc) + dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n"); + } else { + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + msleep(GAUDI_CPU_RESET_WAIT_MSEC); + } +} + static void gaudi_init_scrambler_sram(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -4106,9 +4138,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) * stopped in any means necessary */ if (hdev->asic_prop.hard_reset_done_by_fw) - WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV); + gaudi_fw_hard_reset(hdev); else - WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + gaudi_fw_halt_cpu(hdev); if (hdev->fw_loader.linux_loaded) { irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? -- 2.7.4