1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2020-2023 Intel Corporation
6 #include <linux/bitfield.h>
7 #include <linux/highmem.h>
12 #include "ivpu_mmu_context.h"
14 #define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39)
15 #define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30)
16 #define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21)
17 #define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12)
18 #define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0))
19 #define IVPU_MMU_ENTRY_FLAG_CONT BIT(52)
20 #define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
21 #define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
22 #define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
23 #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
24 #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
25 #define IVPU_MMU_ENTRY_FLAG_VALID BIT(0)
27 #define IVPU_MMU_PAGE_SIZE SZ_4K
28 #define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16)
29 #define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
30 #define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
31 #define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE)
32 #define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE)
33 #define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
35 #define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
36 #define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
37 #define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
38 #define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
39 IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
41 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
45 pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
47 if (!pgtable->pgd_dma_ptr)
50 pgtable->pgd_dma = pgd_dma;
55 static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
58 dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
59 dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
62 static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
64 int pgd_idx, pud_idx, pmd_idx;
65 dma_addr_t pud_dma, pmd_dma, pte_dma;
66 u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr;
68 for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) {
69 pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
70 pud_dma = pgtable->pgd_dma_ptr[pgd_idx];
75 for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) {
76 pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
77 pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx];
82 for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) {
83 pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
84 pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
86 ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
89 kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
90 ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
93 kfree(pgtable->pmd_ptrs[pgd_idx]);
94 kfree(pgtable->pte_ptrs[pgd_idx]);
95 ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
98 ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
102 ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx)
104 u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
110 pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
114 drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]);
115 pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
116 if (!pgtable->pmd_ptrs[pgd_idx])
117 goto err_free_pud_dma_ptr;
119 drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]);
120 pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
121 if (!pgtable->pte_ptrs[pgd_idx])
122 goto err_free_pmd_ptrs;
124 pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr;
125 pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID;
130 kfree(pgtable->pmd_ptrs[pgd_idx]);
132 err_free_pud_dma_ptr:
133 ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
138 ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx,
141 u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
147 pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
151 drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]);
152 pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
153 if (!pgtable->pte_ptrs[pgd_idx][pud_idx])
154 goto err_free_pmd_dma_ptr;
156 pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr;
157 pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID;
161 err_free_pmd_dma_ptr:
162 ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
167 ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
168 int pgd_idx, int pud_idx, int pmd_idx)
170 u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
176 pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
180 pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr;
181 pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID;
187 ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
188 u64 vpu_addr, dma_addr_t dma_addr, u64 prot)
191 int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
192 int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
193 int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
194 int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
196 /* Allocate PUD - second level page table if needed */
197 if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
200 /* Allocate PMD - third level page table if needed */
201 if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx))
204 /* Allocate PTE - fourth level page table if needed */
205 pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx);
210 pte[pte_idx] = dma_addr | prot;
216 ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr,
217 dma_addr_t dma_addr, u64 prot)
219 size_t size = IVPU_MMU_CONT_PAGES_SIZE;
221 drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size));
222 drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size));
224 prot |= IVPU_MMU_ENTRY_FLAG_CONT;
227 int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
232 size -= IVPU_MMU_PAGE_SIZE;
233 vpu_addr += IVPU_MMU_PAGE_SIZE;
234 dma_addr += IVPU_MMU_PAGE_SIZE;
240 static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
242 int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
243 int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
244 int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
245 int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
247 /* Update PTE with dummy physical address and clear flags */
248 ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
252 ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
254 struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
255 u64 end_addr = vpu_addr + size;
257 /* Align to PMD entry (2 MB) */
258 vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
260 while (vpu_addr < end_addr) {
261 int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
262 u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
264 while (vpu_addr < end_addr && vpu_addr < pud_end) {
265 int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
266 u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
268 while (vpu_addr < end_addr && vpu_addr < pmd_end) {
269 int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
271 clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
272 IVPU_MMU_PGTABLE_SIZE);
273 vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
275 clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
276 IVPU_MMU_PGTABLE_SIZE);
278 clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
280 clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
284 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
285 u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
291 if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE &&
292 IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) {
293 ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot);
294 map_size = IVPU_MMU_CONT_PAGES_SIZE;
296 ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
297 map_size = IVPU_MMU_PAGE_SIZE;
303 vpu_addr += map_size;
304 dma_addr += map_size;
311 static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
314 ivpu_mmu_context_unmap_page(ctx, vpu_addr);
315 vpu_addr += IVPU_MMU_PAGE_SIZE;
316 size -= IVPU_MMU_PAGE_SIZE;
321 ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
322 u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
324 struct scatterlist *sg;
329 if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
332 * VPU is only 32 bit, but DMA engine is 38 bit
333 * Ranges < 2 GB are reserved for VPU internal registers
334 * Limit range to 8 GB
336 if (vpu_addr < SZ_2G || vpu_addr > SZ_8G)
339 prot = IVPU_MMU_ENTRY_MAPPED;
341 prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
343 mutex_lock(&ctx->lock);
345 for_each_sgtable_dma_sg(sgt, sg, i) {
346 dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
347 size_t size = sg_dma_len(sg) + sg->offset;
349 ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
351 ivpu_err(vdev, "Failed to map context pages\n");
352 mutex_unlock(&ctx->lock);
355 ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
359 mutex_unlock(&ctx->lock);
361 ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
363 ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
368 ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
369 u64 vpu_addr, struct sg_table *sgt)
371 struct scatterlist *sg;
375 if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
376 ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
378 mutex_lock(&ctx->lock);
380 for_each_sgtable_dma_sg(sgt, sg, i) {
381 size_t size = sg_dma_len(sg) + sg->offset;
383 ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
384 ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
388 mutex_unlock(&ctx->lock);
390 ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
392 ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
396 ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
397 const struct ivpu_addr_range *range,
398 u64 size, struct drm_mm_node *node)
400 lockdep_assert_held(&ctx->lock);
402 if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
403 if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
404 range->start, range->end, DRM_MM_INSERT_BEST))
408 return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
409 range->start, range->end, DRM_MM_INSERT_BEST);
413 ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
415 lockdep_assert_held(&ctx->lock);
417 drm_mm_remove_node(node);
421 ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
426 mutex_init(&ctx->lock);
427 INIT_LIST_HEAD(&ctx->bo_list);
429 ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
434 start = vdev->hw->ranges.global.start;
435 end = vdev->hw->ranges.shave.end;
437 start = vdev->hw->ranges.user.start;
438 end = vdev->hw->ranges.dma.end;
441 drm_mm_init(&ctx->mm, start, end - start);
442 ctx->id = context_id;
447 static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
449 if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
452 mutex_destroy(&ctx->lock);
453 ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
454 drm_mm_takedown(&ctx->mm);
456 ctx->pgtable.pgd_dma_ptr = NULL;
457 ctx->pgtable.pgd_dma = 0;
460 int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
462 return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
465 void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
467 return ivpu_mmu_context_fini(vdev, &vdev->gctx);
470 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
472 struct ivpu_file_priv *file_priv;
474 xa_lock(&vdev->context_xa);
476 file_priv = xa_load(&vdev->context_xa, ssid);
478 file_priv->has_mmu_faults = true;
480 xa_unlock(&vdev->context_xa);
483 int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
487 drm_WARN_ON(&vdev->drm, !ctx_id);
489 ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
491 ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
495 ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
497 ivpu_err(vdev, "Failed to set page table: %d\n", ret);
498 goto err_context_fini;
504 ivpu_mmu_context_fini(vdev, ctx);
508 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
510 drm_WARN_ON(&vdev->drm, !ctx->id);
512 ivpu_mmu_clear_pgtable(vdev, ctx->id);
513 ivpu_mmu_context_fini(vdev, ctx);