From 5d8a5f29658716348c625836a6bfdad6929db224 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 9 Mar 2023 11:27:26 +0200 Subject: [PATCH] accel/habanalabs: in {e/p}dma_core events read the err cause reg Since the err_cause register is unprivileged, we should read it from the driver instead of using the param that came from the FW. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 40 ++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 224eaaf..94d53cd 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -8689,14 +8689,13 @@ static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, return error_count; } -static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, - u64 intr_cause_data) +static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr) { - u32 error_count = 0; + u32 error_count = 0, sts_val = RREG32(sts_addr); int i; for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) - if (intr_cause_data & BIT(i)) { + if (sts_val & BIT(i)) { gaudi2_print_event(hdev, event_type, true, "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); error_count++; @@ -8707,6 +8706,27 @@ static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, return error_count; } +static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx) +{ + u32 sts_addr; + + sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET; + return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr); +} + +static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx) +{ + static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5}; + u32 sts_addr, index; + + index = edma_event_index_map[edma_idx]; + + sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE + + DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) + + DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE); + return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr); +} + static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) { u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; @@ -9524,9 +9544,15 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; - case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: - error_count = gaudi2_handle_dma_core_event(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE: + index = event_type - GAUDI2_EVENT_HDMA2_CORE; + error_count = gaudi2_handle_edma_core_event(hdev, event_type, index); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + break; + + case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE: + index = event_type - GAUDI2_EVENT_PDMA0_CORE; + error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; -- 2.7.4