From: Ohad Sharabi Date: Mon, 21 Mar 2022 12:39:19 +0000 (+0200) Subject: habanalabs: add MMU prefetch to ASIC-specific code X-Git-Tag: v6.6.17~7345^2~39 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=acbabe63efa78012eb3702486e1cfe7b485f70a0;p=platform%2Fkernel%2Flinux-rpi.git habanalabs: add MMU prefetch to ASIC-specific code This is necessary pre-requisite for future ASIC support, where MMU TLB prefetch is supported. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Greg Kroah-Hartman --- diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index a6c6d9f..1f7758f 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1283,8 +1283,8 @@ struct fw_load_mgr { * @write_pte: write MMU page table entry to DRAM. * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft * (L1 only) or hard (L0 & L1) flush. - * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with - * ASID-VA-size mask. + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask. + * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask. * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. @@ -1416,6 +1416,8 @@ struct hl_asic_funcs { u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); + int (*mmu_prefetch_cache_range)(struct hl_device *hdev, u32 flags, u32 asid, u64 va, + u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, @@ -3143,6 +3145,7 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); +int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size); u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte); u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, u8 hop_idx, u64 hop_addr, u64 virt_addr); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 4d44c34e..a438417 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1241,20 +1241,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { - mutex_unlock(&ctx->mmu_lock); - dev_err(hdev->dev, "mapping page pack failed for handle %u\n", - handle); + dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); + if (rc) + goto map_err; - mutex_unlock(&ctx->mmu_lock); - + /* already prefetch the relevant translations to the cache */ + rc = hl_mmu_prefetch_cache_range(hdev, *vm_type, ctx->asid, ret_vaddr, + phys_pg_pack->total_size); if (rc) goto map_err; + mutex_unlock(&ctx->mmu_lock); + ret_vaddr += phys_pg_pack->offset; hnode->ptr = vm_type; @@ -1272,6 +1275,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; map_err: + mutex_unlock(&ctx->mmu_lock); + if (add_va_block(hdev, va_range, ret_vaddr, ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 27c9088..dcd59d7 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -665,6 +665,17 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, return rc; } +int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size) +{ + int rc; + + rc = hdev->asic_funcs->mmu_prefetch_cache_range(hdev, flags, asid, va, size); + if (rc) + dev_err_ratelimited(hdev->dev, "MMU cache range prefetch failed\n"); + + return rc; +} + u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 66d56ef..68c066e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -9377,6 +9377,12 @@ static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_si info->page_order_bitmask = 0; } +static int gaudi_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, + u64 size) +{ + return 0; +} + static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); @@ -9444,6 +9450,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, + .mmu_prefetch_cache_range = gaudi_mmu_prefetch_cache_range, .send_heartbeat = gaudi_send_heartbeat, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0bc2b07..bc8431e 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5680,6 +5680,12 @@ static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_siz info->page_order_bitmask = 0; } +static int goya_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, + u64 size) +{ + return 0; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5724,6 +5730,7 @@ static const struct hl_asic_funcs goya_funcs = { .write_pte = goya_write_pte, .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, + .mmu_prefetch_cache_range = goya_mmu_prefetch_cache_range, .send_heartbeat = goya_send_heartbeat, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle,