habanalabs: handle device TPM boot error as warning
authorOfir Bitton <obitton@habana.ai>
Wed, 10 Nov 2021 09:41:43 +0000 (11:41 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Sun, 26 Dec 2021 06:59:05 +0000 (08:59 +0200)
AS TPM error indication is not fatal, driver should dump a warning
and continue booting.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/include/common/hl_boot_if.h

index 70e992b..aea5904 100644 (file)
@@ -529,6 +529,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
                err_exists = true;
        }
 
+       if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
+               dev_warn(hdev->dev,
+                       "Device boot warning - TPM failure\n");
+               /* This is a warning so we don't want it to disable the
+                * device
+                */
+               err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
+       }
+
        /* return error only if it's in the predefined mask */
        if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
                                lower_32_bits(hdev->boot_error_status_mask)))
index 2626df6..135e21d 100644 (file)
@@ -32,6 +32,7 @@ enum cpu_boot_err {
        CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
        CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
        CPU_BOOT_ERR_BINNING_FAIL = 19,
+       CPU_BOOT_ERR_TPM_FAIL = 20,
        CPU_BOOT_ERR_ENABLED = 31,
        CPU_BOOT_ERR_SCND_EN = 63,
        CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
@@ -108,6 +109,8 @@ enum cpu_boot_err {
  *                                     malfunctioning components might still be
  *                                     in use.
  *
+ * CPU_BOOT_ERR0_TPM_FAIL              TPM verification flow failed.
+ *
  * CPU_BOOT_ERR0_ENABLED               Error registers enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the error
@@ -130,6 +133,7 @@ enum cpu_boot_err {
 #define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL     (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
 #define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR         (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
 #define CPU_BOOT_ERR0_BINNING_FAIL             (1 << CPU_BOOT_ERR_BINNING_FAIL)
+#define CPU_BOOT_ERR0_TPM_FAIL                 (1 << CPU_BOOT_ERR_TPM_FAIL)
 #define CPU_BOOT_ERR0_ENABLED                  (1 << CPU_BOOT_ERR_ENABLED)
 #define CPU_BOOT_ERR1_ENABLED                  (1 << CPU_BOOT_ERR_ENABLED)