From 17f3f42af2bcddc38ff08b355e007f3b6d5ce70c Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 29 Sep 2022 10:21:28 +0300 Subject: [PATCH] habanalabs: handle HBM MMU when capturing page fault data In case of HBM MMU page fault, capture its relevant mappings. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e9b373a..b8b3228 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -2273,15 +2273,20 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_ num_of_engines * sizeof(u16)); hdev->captured_err_info.razwi.flags = flags; } -static void hl_capture_user_mappings(struct hl_device *hdev) +static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) { struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode; struct hl_userptr *userptr; + enum vm_type *vm_type; struct hl_ctx *ctx; u32 map_idx = 0; int i; + /* Reset previous session count*/ + pgf_info->num_of_user_mappings = 0; + ctx = hl_get_compute_ctx(hdev); if (!ctx) { dev_err(hdev->dev, "Can't get user context for user mappings\n"); @@ -2290,7 +2295,7 @@ static void hl_capture_user_mappings(struct hl_device *hdev) mutex_lock(&ctx->mem_hash_lock); hash_for_each(ctx->mem_hash, i, hnode, node) - pgf_info->num_of_user_mappings++; + pgf_info->num_of_user_mappings++; if (!pgf_info->num_of_user_mappings) goto finish; @@ -2300,17 +2305,25 @@ static void hl_capture_user_mappings(struct hl_device *hdev) */ vfree(pgf_info->user_mappings); pgf_info->user_mappings = - vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); + vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); if (!pgf_info->user_mappings) { pgf_info->num_of_user_mappings = 0; goto finish; } hash_for_each(ctx->mem_hash, i, hnode, node) { - userptr = hnode->ptr; - pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; - pgf_info->user_mappings[map_idx].size = userptr->size; - map_idx++; + vm_type = hnode->ptr; + if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { + userptr = hnode->ptr; + pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; + pgf_info->user_mappings[map_idx].size = userptr->size; + map_idx++; + } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { + phys_pg_pack = hnode->ptr; + pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; + pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; + map_idx++; + } } finish: mutex_unlock(&ctx->mem_hash_lock); @@ -2326,5 +2339,5 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get()); hdev->captured_err_info.pgf_info.pgf.addr = addr; hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id; - hl_capture_user_mappings(hdev); + hl_capture_user_mappings(hdev, is_pmmu); } -- 2.7.4