From d155df4f628a5312a485235aa8cc5ba78e11ea65 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 23 Aug 2022 16:23:56 +0300 Subject: [PATCH] habanalabs: ignore EEPROM errors during boot EEPROM errors reported by firmware are basically warnings and should not fail the boot process. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++++ drivers/misc/habanalabs/include/common/hl_boot_if.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 12d0f18..4ede4bb 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -573,6 +573,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); /* All warnings should go here in order not to reach the unknown error validation */ + if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { + dev_warn(hdev->dev, + "Device boot warning - EEPROM failure detected, default settings applied\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL; + } + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index f2f6488..2e45be5 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -34,6 +34,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_TPM_FAIL = 20, CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -115,6 +116,9 @@ enum cpu_boot_err { * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature * sensor. * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -139,6 +143,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) -- 2.7.4