accel/habanalabs: unmap mapped memory when TLB inv fails
authorKoby Elbaz <kelbaz@habana.ai>
Wed, 8 Mar 2023 15:53:39 +0000 (17:53 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Sat, 8 Apr 2023 07:39:33 +0000 (10:39 +0300)
Once a memory mapping is added to the page tables, it's followed by
a TLB invalidation request which could potentially fail (HW failure).
Removing the mapping is simply a part of this failure handling routine.
TLB invalidation failure prints were updated to be more accurate.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
drivers/accel/habanalabs/common/command_buffer.c
drivers/accel/habanalabs/common/mmu/mmu.c

index 3a0535a..6e09f48 100644 (file)
@@ -45,20 +45,29 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
        }
 
        mutex_lock(&hdev->mmu_lock);
+
        rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
        if (rc) {
                dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
-               goto err_va_umap;
+               goto err_va_pool_free;
        }
+
        rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
+       if (rc)
+               goto err_mmu_unmap;
+
        mutex_unlock(&hdev->mmu_lock);
 
        cb->is_mmu_mapped = true;
-       return rc;
 
-err_va_umap:
+       return 0;
+
+err_mmu_unmap:
+       hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
+err_va_pool_free:
        mutex_unlock(&hdev->mmu_lock);
        gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
+
        return rc;
 }
 
index 17581b1..f379e5b 100644 (file)
@@ -679,7 +679,9 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
 
        rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
        if (rc)
-               dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n");
+               dev_err_ratelimited(hdev->dev,
+                               "%s cache invalidation failed, rc=%d\n",
+                               flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc);
 
        return rc;
 }
@@ -692,7 +694,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
        rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags,
                                                                asid, va, size);
        if (rc)
-               dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n");
+               dev_err_ratelimited(hdev->dev,
+                               "%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
+                               flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc);
 
        return rc;
 }