From 6c31f494d8a9cf7e6081f94717a46ce789da6bc6 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 17 Jun 2021 09:52:55 +0300 Subject: [PATCH] habanalabs/gaudi: add support for NIC DERR We add support for NIC DERR ECC error events, in case this error is received a device reset will be performed. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 1 + .../misc/habanalabs/include/gaudi/gaudi_async_events.h | 5 +++++ .../include/gaudi/gaudi_async_ids_map_extended.h | 15 ++++++++++----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4a75df2..82d5613 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7870,6 +7870,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: case GAUDI_EVENT_MMU_DERR: + case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); goto reset_device; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index 2aee18e..d966bd4 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -252,6 +252,11 @@ enum gaudi_async_event_id { GAUDI_EVENT_HBM3_SPI_0 = 407, GAUDI_EVENT_HBM3_SPI_1 = 408, GAUDI_EVENT_PSOC_GPIO_U16_0 = 421, + GAUDI_EVENT_NIC0_CS_DBG_DERR = 483, + GAUDI_EVENT_NIC1_CS_DBG_DERR = 487, + GAUDI_EVENT_NIC2_CS_DBG_DERR = 491, + GAUDI_EVENT_NIC3_CS_DBG_DERR = 495, + GAUDI_EVENT_NIC4_CS_DBG_DERR = 499, GAUDI_EVENT_RAZWI_OR_ADC = 548, GAUDI_EVENT_TPC0_QM = 572, GAUDI_EVENT_TPC1_QM = 573, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index ac4d4b5..479b6b0 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -507,23 +507,28 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 480, .cpu_id = 329, .valid = 0, .name = "" }, { .fc_id = 481, .cpu_id = 330, .valid = 0, .name = "" }, { .fc_id = 482, .cpu_id = 331, .valid = 0, .name = "" }, - { .fc_id = 483, .cpu_id = 332, .valid = 0, .name = "" }, + { .fc_id = 483, .cpu_id = 332, .valid = 1, + .name = "NIC0_CS_DBG_DERR" }, { .fc_id = 484, .cpu_id = 333, .valid = 0, .name = "" }, { .fc_id = 485, .cpu_id = 334, .valid = 0, .name = "" }, { .fc_id = 486, .cpu_id = 335, .valid = 0, .name = "" }, - { .fc_id = 487, .cpu_id = 336, .valid = 0, .name = "" }, + { .fc_id = 487, .cpu_id = 336, .valid = 1, + .name = "NIC1_CS_DBG_DERR" }, { .fc_id = 488, .cpu_id = 337, .valid = 0, .name = "" }, { .fc_id = 489, .cpu_id = 338, .valid = 0, .name = "" }, { .fc_id = 490, .cpu_id = 339, .valid = 0, .name = "" }, - { .fc_id = 491, .cpu_id = 340, .valid = 0, .name = "" }, + { .fc_id = 491, .cpu_id = 340, .valid = 1, + .name = "NIC2_CS_DBG_DERR" }, { .fc_id = 492, .cpu_id = 341, .valid = 0, .name = "" }, { .fc_id = 493, .cpu_id = 342, .valid = 0, .name = "" }, { .fc_id = 494, .cpu_id = 343, .valid = 0, .name = "" }, - { .fc_id = 495, .cpu_id = 344, .valid = 0, .name = "" }, + { .fc_id = 495, .cpu_id = 344, .valid = 1, + .name = "NIC3_CS_DBG_DERR" }, { .fc_id = 496, .cpu_id = 345, .valid = 0, .name = "" }, { .fc_id = 497, .cpu_id = 346, .valid = 0, .name = "" }, { .fc_id = 498, .cpu_id = 347, .valid = 0, .name = "" }, - { .fc_id = 499, .cpu_id = 348, .valid = 0, .name = "" }, + { .fc_id = 499, .cpu_id = 348, .valid = 1, + .name = "NIC4_CS_DBG_DERR" }, { .fc_id = 500, .cpu_id = 349, .valid = 0, .name = "" }, { .fc_id = 501, .cpu_id = 350, .valid = 0, .name = "" }, { .fc_id = 502, .cpu_id = 351, .valid = 0, .name = "" }, -- 2.7.4