habanalabs: split the host MMU properties
authorOmer Shpigelman <oshpigelman@habana.ai>
Sun, 5 Jan 2020 09:05:45 +0000 (09:05 +0000)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 24 Mar 2020 08:54:16 +0000 (10:54 +0200)
Host memory may be allocated with huge pages.
A different virtual range may be used for mapping in this case.
Add Huge PCI MMU (HPMMU) properties to support it.
This patch is a prerequisite for future ASICs support and has no effect on
Goya ASIC as currently a single virtual host range is used for all page
sizes.

Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/debugfs.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/goya/goya_coresight.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/memory.c
drivers/misc/habanalabs/mmu.c

index 20413e3..599d17d 100644 (file)
@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
        }
 
        is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+                                               prop->dmmu.start_addr,
+                                               prop->dmmu.end_addr);
 
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
        mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
        mutex_lock(&ctx->mmu_lock);
@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
                goto out;
 
        if (hdev->dram_supports_virtual_memory &&
-                       addr >= prop->va_space_dram_start_address &&
-                       addr < prop->va_space_dram_end_address)
+               (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
                return true;
 
-       if (addr >= prop->va_space_host_start_address &&
-                       addr < prop->va_space_host_end_address)
+       if (addr >= prop->pmmu.start_addr &&
+               addr < prop->pmmu.end_addr)
+               return true;
+
+       if (addr >= prop->pmmu_huge.start_addr &&
+               addr < prop->pmmu_huge.end_addr)
                return true;
 out:
        return false;
@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
        }
 
        is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+                                               prop->dmmu.start_addr,
+                                               prop->dmmu.end_addr);
 
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
        mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
        mutex_lock(&ctx->mmu_lock);
index 3c67948..74785cc 100644 (file)
@@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
        prop->dmmu.hop2_mask = HOP2_MASK;
        prop->dmmu.hop3_mask = HOP3_MASK;
        prop->dmmu.hop4_mask = HOP4_MASK;
-       prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+       prop->dmmu.start_addr = VA_DDR_SPACE_START;
+       prop->dmmu.end_addr = VA_DDR_SPACE_END;
+       prop->dmmu.page_size = PAGE_SIZE_2MB;
 
-       /* No difference between PMMU and DMMU except of page size */
+       /* shifts and masks are the same in PMMU and DMMU */
        memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
-       prop->dmmu.page_size = PAGE_SIZE_2MB;
+       prop->pmmu.start_addr = VA_HOST_SPACE_START;
+       prop->pmmu.end_addr = VA_HOST_SPACE_END;
        prop->pmmu.page_size = PAGE_SIZE_4KB;
 
-       prop->va_space_host_start_address = VA_HOST_SPACE_START;
-       prop->va_space_host_end_address = VA_HOST_SPACE_END;
-       prop->va_space_dram_start_address = VA_DDR_SPACE_START;
-       prop->va_space_dram_end_address = VA_DDR_SPACE_END;
-       prop->dram_size_for_default_page_mapping =
-                       prop->va_space_dram_end_address;
+       /* PMMU and HPMMU are the same except of page size */
+       memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
+       prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
+
+       prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
        prop->cfg_size = CFG_SIZE;
        prop->max_asid = MAX_ASID;
        prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
@@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
        /*
         * WA for HW-23.
         * We can't allow user to read from Host using QMANs other than 1.
+        * PMMU and HPMMU addresses are equal, check only one of them.
         */
        if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
                hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
                                le32_to_cpu(user_dma_pkt->tsize),
-                               hdev->asic_prop.va_space_host_start_address,
-                               hdev->asic_prop.va_space_host_end_address)) {
+                               hdev->asic_prop.pmmu.start_addr,
+                               hdev->asic_prop.pmmu.end_addr)) {
                dev_err(hdev->dev,
                        "Can't DMA from host on queue other then 1\n");
                return -EFAULT;
index c1ee6e2..a1bc930 100644 (file)
@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
        u64 range_start, range_end;
 
        if (hdev->mmu_enable) {
-               range_start = prop->va_space_dram_start_address;
-               range_end = prop->va_space_dram_end_address;
+               range_start = prop->dmmu.start_addr;
+               range_end = prop->dmmu.end_addr;
        } else {
                range_start = prop->dram_user_base_address;
                range_end = prop->dram_end_address;
index df34227..5c751b9 100644 (file)
@@ -132,6 +132,8 @@ enum hl_device_hw_state {
 
 /**
  * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @start_addr: virtual start address of the memory region.
+ * @end_addr: virtual end address of the memory region.
  * @hop0_shift: shift of hop 0 mask.
  * @hop1_shift: shift of hop 1 mask.
  * @hop2_shift: shift of hop 2 mask.
@@ -143,9 +145,10 @@ enum hl_device_hw_state {
  * @hop3_mask: mask to get the PTE address in hop 3.
  * @hop4_mask: mask to get the PTE address in hop 4.
  * @page_size: default page size used to allocate memory.
- * @huge_page_size: page size used to allocate memory with huge pages.
  */
 struct hl_mmu_properties {
+       u64     start_addr;
+       u64     end_addr;
        u64     hop0_shift;
        u64     hop1_shift;
        u64     hop2_shift;
@@ -157,7 +160,6 @@ struct hl_mmu_properties {
        u64     hop3_mask;
        u64     hop4_mask;
        u32     page_size;
-       u32     huge_page_size;
 };
 
 /**
@@ -169,6 +171,8 @@ struct hl_mmu_properties {
  * @preboot_ver: F/W Preboot version.
  * @dmmu: DRAM MMU address translation properties.
  * @pmmu: PCI (host) MMU address translation properties.
+ * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ *              allocated with huge pages.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -178,14 +182,6 @@ struct hl_mmu_properties {
  * @dram_size: DRAM total size.
  * @dram_pci_bar_size: size of PCI bar towards DRAM.
  * @max_power_default: max power of the device after reset
- * @va_space_host_start_address: base address of virtual memory range for
- *                               mapping host memory.
- * @va_space_host_end_address: end address of virtual memory range for
- *                             mapping host memory.
- * @va_space_dram_start_address: base address of virtual memory range for
- *                               mapping DRAM memory.
- * @va_space_dram_end_address: end address of virtual memory range for
- *                             mapping DRAM memory.
  * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
  *                                      fault.
  * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
@@ -218,6 +214,7 @@ struct asic_fixed_properties {
        char                            preboot_ver[VERSION_MAX_LEN];
        struct hl_mmu_properties        dmmu;
        struct hl_mmu_properties        pmmu;
+       struct hl_mmu_properties        pmmu_huge;
        u64                             sram_base_address;
        u64                             sram_end_address;
        u64                             sram_user_base_address;
@@ -227,10 +224,6 @@ struct asic_fixed_properties {
        u64                             dram_size;
        u64                             dram_pci_bar_size;
        u64                             max_power_default;
-       u64                             va_space_host_start_address;
-       u64                             va_space_host_end_address;
-       u64                             va_space_dram_start_address;
-       u64                             va_space_dram_end_address;
        u64                             dram_size_for_default_page_mapping;
        u64                             pcie_dbi_base_address;
        u64                             pcie_aux_dbi_reg_addr;
@@ -658,6 +651,8 @@ struct hl_va_range {
  *             this hits 0l. It is incremented on CS and CS_WAIT.
  * @cs_pending: array of DMA fence objects representing pending CS.
  * @host_va_range: holds available virtual addresses for host mappings.
+ * @host_huge_va_range: holds available virtual addresses for host mappings
+ *                      with huge pages.
  * @dram_va_range: holds available virtual addresses for DRAM mappings.
  * @mem_hash_lock: protects the mem_hash.
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
@@ -688,8 +683,9 @@ struct hl_ctx {
        struct hl_device        *hdev;
        struct kref             refcount;
        struct dma_fence        *cs_pending[HL_MAX_PENDING_CS];
-       struct hl_va_range      host_va_range;
-       struct hl_va_range      dram_va_range;
+       struct hl_va_range      *host_va_range;
+       struct hl_va_range      *host_huge_va_range;
+       struct hl_va_range      *dram_va_range;
        struct mutex            mem_hash_lock;
        struct mutex            mmu_lock;
        struct list_head        debugfs_list;
@@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts {
  *                   otherwise.
  * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
  * @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ *                   huge pages.
  * @init_done: is the initialization of the device done.
  * @mmu_enable: is MMU enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
@@ -1372,6 +1370,7 @@ struct hl_device {
        u8                              reset_on_lockup;
        u8                              dram_supports_virtual_memory;
        u8                              dram_default_page_mapping;
+       u8                              pmmu_huge_range;
        u8                              init_done;
        u8                              device_cpu_disabled;
        u8                              dma_mask;
index b612b1a..a72f766 100644 (file)
@@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
                 * or not, hence we continue with the biggest possible
                 * granularity.
                 */
-               page_size = hdev->asic_prop.pmmu.huge_page_size;
+               page_size = hdev->asic_prop.pmmu_huge.page_size;
        else
                page_size = hdev->asic_prop.dmmu.page_size;
 
@@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
                                struct hl_userptr *userptr,
                                struct hl_vm_phys_pg_pack **pphys_pg_pack)
 {
-       struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
        struct hl_vm_phys_pg_pack *phys_pg_pack;
        struct scatterlist *sg;
        dma_addr_t dma_addr;
        u64 page_mask, total_npages;
        u32 npages, page_size = PAGE_SIZE,
-               huge_page_size = mmu_prop->huge_page_size;
+               huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
        bool first = true, is_huge_page_opt = true;
        int rc, i, j;
        u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
@@ -856,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
        struct hl_vm_phys_pg_pack *phys_pg_pack;
        struct hl_userptr *userptr = NULL;
        struct hl_vm_hash_node *hnode;
+       struct hl_va_range *va_range;
        enum vm_type_t *vm_type;
        u64 ret_vaddr, hint_addr;
        u32 handle = 0;
@@ -927,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                goto hnode_err;
        }
 
-       ret_vaddr = get_va_block(hdev,
-                       is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
-                       phys_pg_pack->total_size, hint_addr, is_userptr);
+       if (is_userptr)
+               if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
+                       va_range = ctx->host_va_range;
+               else
+                       va_range = ctx->host_huge_va_range;
+       else
+               va_range = ctx->dram_va_range;
+
+       ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
+                                       hint_addr, is_userptr);
        if (!ret_vaddr) {
                dev_err(hdev->dev, "no available va block for handle %u\n",
                                handle);
@@ -968,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
        return 0;
 
 map_err:
-       if (add_va_block(hdev,
-                       is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
-                       ret_vaddr,
-                       ret_vaddr + phys_pg_pack->total_size - 1))
+       if (add_va_block(hdev, va_range, ret_vaddr,
+                               ret_vaddr + phys_pg_pack->total_size - 1))
                dev_warn(hdev->dev,
                        "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
                                handle, ret_vaddr);
@@ -1033,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
 
        if (*vm_type == VM_TYPE_USERPTR) {
                is_userptr = true;
-               va_range = &ctx->host_va_range;
                userptr = hnode->ptr;
                rc = init_phys_pg_pack_from_userptr(ctx, userptr,
                                                        &phys_pg_pack);
@@ -1043,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
                                vaddr);
                        goto vm_type_err;
                }
+
+               if (phys_pg_pack->page_size ==
+                                       hdev->asic_prop.pmmu.page_size)
+                       va_range = ctx->host_va_range;
+               else
+                       va_range = ctx->host_huge_va_range;
        } else if (*vm_type == VM_TYPE_PHYS_PACK) {
                is_userptr = false;
-               va_range = &ctx->dram_va_range;
+               va_range = ctx->dram_va_range;
                phys_pg_pack = hnode->ptr;
        } else {
                dev_warn(hdev->dev,
@@ -1441,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
 }
 
 /*
- * hl_va_range_init - initialize virtual addresses range
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_range            : pointer to the range to initialize
- * @start               : range start address
- * @end                 : range end address
+ * va_range_init - initialize virtual addresses range
+ * @hdev: pointer to the habanalabs device structure
+ * @va_range: pointer to the range to initialize
+ * @start: range start address
+ * @end: range end address
  *
  * This function does the following:
  * - Initializes the virtual addresses list of the given range with the given
  *   addresses.
  */
-static int hl_va_range_init(struct hl_device *hdev,
-               struct hl_va_range *va_range, u64 start, u64 end)
+static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
+                               u64 start, u64 end)
 {
        int rc;
 
@@ -1488,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
 }
 
 /*
- * hl_vm_ctx_init_with_ranges - initialize virtual memory for context
+ * va_range_fini() - clear a virtual addresses range
+ * @hdev: pointer to the habanalabs structure
+ * va_range: pointer to virtual addresses range
  *
- * @ctx                 : pointer to the habanalabs context structure
- * @host_range_start    : host virtual addresses range start
- * @host_range_end      : host virtual addresses range end
- * @dram_range_start    : dram virtual addresses range start
- * @dram_range_end      : dram virtual addresses range end
+ * This function does the following:
+ * - Frees the virtual addresses block list and its lock
+ */
+static void va_range_fini(struct hl_device *hdev,
+               struct hl_va_range *va_range)
+{
+       mutex_lock(&va_range->lock);
+       clear_va_list_locked(hdev, &va_range->list);
+       mutex_unlock(&va_range->lock);
+
+       mutex_destroy(&va_range->lock);
+       kfree(va_range);
+}
+
+/*
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context
+ * @ctx: pointer to the habanalabs context structure
+ * @host_range_start: host virtual addresses range start.
+ * @host_range_end: host virtual addresses range end.
+ * @host_huge_range_start: host virtual addresses range start for memory
+ *                          allocated with huge pages.
+ * @host_huge_range_end: host virtual addresses range end for memory allocated
+ *                        with huge pages.
+ * @dram_range_start: dram virtual addresses range start.
+ * @dram_range_end: dram virtual addresses range end.
  *
  * This function initializes the following:
  * - MMU for context
  * - Virtual address to area descriptor hashtable
  * - Virtual block list of available virtual memory
  */
-static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
-                               u64 host_range_end, u64 dram_range_start,
-                               u64 dram_range_end)
+static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
+                                       u64 host_range_start,
+                                       u64 host_range_end,
+                                       u64 host_huge_range_start,
+                                       u64 host_huge_range_end,
+                                       u64 dram_range_start,
+                                       u64 dram_range_end)
 {
        struct hl_device *hdev = ctx->hdev;
        int rc;
 
+       ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
+       if (!ctx->host_va_range)
+               return -ENOMEM;
+
+       ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
+                                               GFP_KERNEL);
+       if (!ctx->host_huge_va_range) {
+               rc =  -ENOMEM;
+               goto host_huge_va_range_err;
+       }
+
+       ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
+       if (!ctx->dram_va_range) {
+               rc = -ENOMEM;
+               goto dram_va_range_err;
+       }
+
        rc = hl_mmu_ctx_init(ctx);
        if (rc) {
                dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
-               return rc;
+               goto mmu_ctx_err;
        }
 
        mutex_init(&ctx->mem_hash_lock);
        hash_init(ctx->mem_hash);
 
-       mutex_init(&ctx->host_va_range.lock);
+       mutex_init(&ctx->host_va_range->lock);
 
-       rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
-                       host_range_end);
+       rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
+                               host_range_end);
        if (rc) {
                dev_err(hdev->dev, "failed to init host vm range\n");
-               goto host_vm_err;
+               goto host_page_range_err;
+       }
+
+       if (hdev->pmmu_huge_range) {
+               mutex_init(&ctx->host_huge_va_range->lock);
+
+               rc = va_range_init(hdev, ctx->host_huge_va_range,
+                                       host_huge_range_start,
+                                       host_huge_range_end);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed to init host huge vm range\n");
+                       goto host_hpage_range_err;
+               }
+       } else {
+               ctx->host_huge_va_range = ctx->host_va_range;
        }
 
-       mutex_init(&ctx->dram_va_range.lock);
+       mutex_init(&ctx->dram_va_range->lock);
 
-       rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
+       rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
                        dram_range_end);
        if (rc) {
                dev_err(hdev->dev, "failed to init dram vm range\n");
@@ -1540,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
        return 0;
 
 dram_vm_err:
-       mutex_destroy(&ctx->dram_va_range.lock);
+       mutex_destroy(&ctx->dram_va_range->lock);
 
-       mutex_lock(&ctx->host_va_range.lock);
-       clear_va_list_locked(hdev, &ctx->host_va_range.list);
-       mutex_unlock(&ctx->host_va_range.lock);
-host_vm_err:
-       mutex_destroy(&ctx->host_va_range.lock);
+       if (hdev->pmmu_huge_range) {
+               mutex_lock(&ctx->host_huge_va_range->lock);
+               clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
+               mutex_unlock(&ctx->host_huge_va_range->lock);
+       }
+host_hpage_range_err:
+       if (hdev->pmmu_huge_range)
+               mutex_destroy(&ctx->host_huge_va_range->lock);
+       mutex_lock(&ctx->host_va_range->lock);
+       clear_va_list_locked(hdev, &ctx->host_va_range->list);
+       mutex_unlock(&ctx->host_va_range->lock);
+host_page_range_err:
+       mutex_destroy(&ctx->host_va_range->lock);
        mutex_destroy(&ctx->mem_hash_lock);
        hl_mmu_ctx_fini(ctx);
+mmu_ctx_err:
+       kfree(ctx->dram_va_range);
+dram_va_range_err:
+       kfree(ctx->host_huge_va_range);
+host_huge_va_range_err:
+       kfree(ctx->host_va_range);
 
        return rc;
 }
@@ -1556,8 +1637,8 @@ host_vm_err:
 int hl_vm_ctx_init(struct hl_ctx *ctx)
 {
        struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
-       u64 host_range_start, host_range_end, dram_range_start,
-               dram_range_end;
+       u64 host_range_start, host_range_end, host_huge_range_start,
+               host_huge_range_end, dram_range_start, dram_range_end;
 
        atomic64_set(&ctx->dram_phys_mem, 0);
 
@@ -1569,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
         *   address of the memory related to the given handle.
         */
        if (ctx->hdev->mmu_enable) {
-               dram_range_start = prop->va_space_dram_start_address;
-               dram_range_end = prop->va_space_dram_end_address;
-               host_range_start = prop->va_space_host_start_address;
-               host_range_end = prop->va_space_host_end_address;
+               dram_range_start = prop->dmmu.start_addr;
+               dram_range_end = prop->dmmu.end_addr;
+               host_range_start = prop->pmmu.start_addr;
+               host_range_end = prop->pmmu.end_addr;
+               host_huge_range_start = prop->pmmu_huge.start_addr;
+               host_huge_range_end = prop->pmmu_huge.end_addr;
        } else {
                dram_range_start = prop->dram_user_base_address;
                dram_range_end = prop->dram_end_address;
                host_range_start = prop->dram_user_base_address;
                host_range_end = prop->dram_end_address;
+               host_huge_range_start = prop->dram_user_base_address;
+               host_huge_range_end = prop->dram_end_address;
        }
 
-       return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
-                       dram_range_start, dram_range_end);
-}
-
-/*
- * hl_va_range_fini     - clear a virtual addresses range
- *
- * @hdev                : pointer to the habanalabs structure
- * va_range             : pointer to virtual addresses range
- *
- * This function does the following:
- * - Frees the virtual addresses block list and its lock
- */
-static void hl_va_range_fini(struct hl_device *hdev,
-               struct hl_va_range *va_range)
-{
-       mutex_lock(&va_range->lock);
-       clear_va_list_locked(hdev, &va_range->list);
-       mutex_unlock(&va_range->lock);
-
-       mutex_destroy(&va_range->lock);
+       return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+                                       host_huge_range_start,
+                                       host_huge_range_end,
+                                       dram_range_start,
+                                       dram_range_end);
 }
 
 /*
@@ -1667,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
                }
        spin_unlock(&vm->idr_lock);
 
-       hl_va_range_fini(hdev, &ctx->dram_va_range);
-       hl_va_range_fini(hdev, &ctx->host_va_range);
+       va_range_fini(hdev, ctx->dram_va_range);
+       if (hdev->pmmu_huge_range)
+               va_range_fini(hdev, ctx->host_huge_va_range);
+       va_range_fini(hdev, ctx->host_va_range);
 
        mutex_destroy(&ctx->mem_hash_lock);
        hl_mmu_ctx_fini(ctx);
index 006eee4..a290d6b 100644 (file)
@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
        return phys_hop_addr + pte_offset;
 }
 
+static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+       return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                                       prop->dmmu.start_addr,
+                                       prop->dmmu.end_addr);
+}
+
 static int dram_default_mapping_init(struct hl_ctx *ctx)
 {
        struct hl_device *hdev = ctx->hdev;
@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
                curr_pte;
        bool is_huge, clear_hop3 = true;
 
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
        mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
        hop0_addr = get_hop0_addr(ctx);
@@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
        if (!hdev->mmu_enable)
                return 0;
 
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+       is_dram_addr = is_dram_va(hdev, virt_addr);
 
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+       if (is_dram_addr)
+               mmu_prop = &prop->dmmu;
+       else if ((page_size % prop->pmmu_huge.page_size) == 0)
+               mmu_prop = &prop->pmmu_huge;
+       else
+               mmu_prop = &prop->pmmu;
 
        /*
         * The H/W handles mapping of specific page sizes. Hence if the page
         * size is bigger, we break it to sub-pages and unmap them separately.
         */
-       if ((page_size % mmu_prop->huge_page_size) == 0) {
-               real_page_size = mmu_prop->huge_page_size;
-       } else if ((page_size % mmu_prop->page_size) == 0) {
+       if ((page_size % mmu_prop->page_size) == 0) {
                real_page_size = mmu_prop->page_size;
        } else {
                dev_err(hdev->dev,
-                       "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
-                       page_size,
-                       mmu_prop->page_size >> 10,
-                       mmu_prop->huge_page_size >> 20);
+                       "page size of %u is not %uKB aligned, can't unmap\n",
+                       page_size, mmu_prop->page_size >> 10);
 
                return -EFAULT;
        }
@@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                hop4_new = false, is_huge;
        int rc = -ENOMEM;
 
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
        /*
         * This mapping function can map a page or a huge page. For huge page
         * there are only 3 hops rather than 4. Currently the DRAM allocation
@@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
         * one of the two page sizes. Since this is a common code for all the
         * three cases, we need this hugs page check.
         */
-       is_huge = page_size == mmu_prop->huge_page_size;
-
-       if (is_dram_addr && !is_huge) {
-               dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
-               return -EFAULT;
+       if (is_dram_addr) {
+               mmu_prop = &prop->dmmu;
+               is_huge = true;
+       } else if (page_size == prop->pmmu_huge.page_size) {
+               mmu_prop = &prop->pmmu_huge;
+               is_huge = true;
+       } else {
+               mmu_prop = &prop->pmmu;
+               is_huge = false;
        }
 
        hop0_addr = get_hop0_addr(ctx);
@@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
        if (!hdev->mmu_enable)
                return 0;
 
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+       is_dram_addr = is_dram_va(hdev, virt_addr);
 
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+       if (is_dram_addr)
+               mmu_prop = &prop->dmmu;
+       else if ((page_size % prop->pmmu_huge.page_size) == 0)
+               mmu_prop = &prop->pmmu_huge;
+       else
+               mmu_prop = &prop->pmmu;
 
        /*
         * The H/W handles mapping of specific page sizes. Hence if the page
         * size is bigger, we break it to sub-pages and map them separately.
         */
-       if ((page_size % mmu_prop->huge_page_size) == 0) {
-               real_page_size = mmu_prop->huge_page_size;
-       } else if ((page_size % mmu_prop->page_size) == 0) {
+       if ((page_size % mmu_prop->page_size) == 0) {
                real_page_size = mmu_prop->page_size;
        } else {
                dev_err(hdev->dev,
-                       "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
-                       page_size,
-                       mmu_prop->page_size >> 10,
-                       mmu_prop->huge_page_size >> 20);
+                       "page size of %u is not %uKB aligned, can't unmap\n",
+                       page_size, mmu_prop->page_size >> 10);
 
                return -EFAULT;
        }