From aff6354afd1f9eae1e10658c157c26e316806f56 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 31 Oct 2022 11:44:45 +0200 Subject: [PATCH] habanalabs/gaudi: add page fault notify event Each time page fault happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 6 +++--- include/uapi/misc/habanalabs.h | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 65bb40f..3181812 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2490,3 +2490,12 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id; hl_capture_user_mappings(hdev, is_pmmu); } + +void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, + u64 *event_mask) +{ + hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); + + if (event_mask) + *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d9335f3..0781b86 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3815,6 +3815,8 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, u8 flags, u64 *event_mask); void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu); +void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, + u64 *event_mask); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 035865c..cbe1daf 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6740,7 +6740,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_i } } -static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr) +static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) { struct gaudi_device *gaudi = hdev->asic_specific; u32 val; @@ -6755,7 +6755,7 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); - hl_capture_page_fault(hdev, *addr, 0, true); + hl_handle_page_fault(hdev, *addr, 0, true, event_mask); WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); } @@ -7323,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, if (razwi) { gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, &is_write); - gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr); + gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); if (is_read) razwi_flags |= HL_RAZWI_READ; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7747e19..e50cb71 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -722,6 +722,7 @@ enum hl_server_type { * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened + * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) @@ -731,6 +732,7 @@ enum hl_server_type { #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) #define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) +#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) /* Opcode for management ioctl * -- 2.7.4