habanalabs: preboot hard reset support
authorOfir Bitton <obitton@habana.ai>
Tue, 1 Dec 2020 08:39:54 +0000 (10:39 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 28 Dec 2020 06:47:38 +0000 (08:47 +0200)
FW hard reset capability indication is now moved to preboot stage.
Driver will check if HW is dirty only after it validated preboot
is up. If HW is dirty, driver will perform a hard reset according
to the FW capability.
In addition, FW defines a new message which driver need to send in
order to initiate a hard reset.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/include/common/hl_boot_if.h

index 0e1c629..c970bfc 100644 (file)
@@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
        security_status = RREG32(cpu_security_boot_status_reg);
 
        /* We read security status multiple times during boot:
-        * 1. preboot - we check if fw security feature is supported
+        * 1. preboot - a. Check whether the security status bits are valid
+        *              b. Check whether fw security is enabled
+        *              c. Check whether hard reset is done by fw
         * 2. boot cpu - we get boot cpu security status
         * 3. FW application - we get FW application security status
         *
@@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
         */
        if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
                hdev->asic_prop.fw_security_status_valid = 1;
-               prop->fw_security_disabled =
-                       !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+
+               if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN))
+                       prop->fw_security_disabled = true;
+
+               if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+                       hdev->asic_prop.hard_reset_done_by_fw = true;
        } else {
                hdev->asic_prop.fw_security_status_valid = 0;
                prop->fw_security_disabled = true;
        }
 
+       dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
+               hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+
        dev_info(hdev->dev, "firmware-level security is %s\n",
                prop->fw_security_disabled ? "disabled" : "enabled");
 
@@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
        }
 
        /* Read FW application security bits */
-       if (hdev->asic_prop.fw_security_status_valid) {
+       if (hdev->asic_prop.fw_security_status_valid)
                hdev->asic_prop.fw_app_security_map =
                                RREG32(cpu_security_boot_status_reg);
 
-               if (hdev->asic_prop.fw_app_security_map &
-                               CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
-                       hdev->asic_prop.hard_reset_done_by_fw = true;
-       }
-
-       dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
-               hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
-
        dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
 out:
index 278c4de..e465c15 100644 (file)
@@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
        if (rc)
                goto free_queue_props;
 
-       if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-               dev_info(hdev->dev,
-                       "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
-       }
-
        /* Before continuing in the initialization, we need to read the preboot
         * version to determine whether we run with a security-enabled firmware
         */
@@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
                goto pci_fini;
        }
 
+       if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+               dev_info(hdev->dev,
+                       "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->hw_fini(hdev, true);
+       }
+
        return 0;
 
 pci_fini:
@@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
        /* I don't know what is the state of the CPU so make sure it is
         * stopped in any means necessary
         */
-       WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+       if (hdev->asic_prop.hard_reset_done_by_fw)
+               WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+       else
+               WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 
        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
 
index b66fd55..d61177b 100644 (file)
@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
        if (rc)
                goto free_queue_props;
 
-       if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-               dev_info(hdev->dev,
-                       "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
-       }
-
        /* Before continuing in the initialization, we need to read the preboot
         * version to determine whether we run with a security-enabled firmware
         */
@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
                goto pci_fini;
        }
 
+       if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+               dev_info(hdev->dev,
+                       "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->hw_fini(hdev, true);
+       }
+
        if (!hdev->pldm) {
                val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
                if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
index e5801ec..755c480 100644 (file)
@@ -204,6 +204,8 @@ enum kmd_msg {
        KMD_MSG_GOTO_WFE,
        KMD_MSG_FIT_RDY,
        KMD_MSG_SKIP_BMC,
+       RESERVED,
+       KMD_MSG_RST_DEV,
 };
 
 enum cpu_msg_status {