drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3
authorRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Sat, 28 Jan 2023 02:57:00 +0000 (21:57 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:53:52 +0000 (09:53 -0400)
[For 1P NPS1 mode driver bringup]

Changes required to initialize the amdgpu driver with frontdoor firmware
loading and discovery=2 with the native mode SBIOS that enables CPU GPU
unified interleaved memory.

sudo modprobe amdgpu discovery=2

Once PSP TMR region is reported via the ACPI interface, the dependency
on the ip_discovery.bin will be removed.

Choice of where to allocate driver table is given to each IP version. In
general, both GTT and VRAM domains will be considered. If one of the
tables has a strict restriction for VRAM domain, then only VRAM domain
is considered.

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
(lijo: Modified the handling for SMU Tables)
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index af37f2e..4e179e5 100644 (file)
@@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
        (*mem)->dmabuf = dma_buf;
        (*mem)->bo = bo;
        (*mem)->va = va;
-       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
                AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
        (*mem)->mapped_to_gpu_memory = 0;
        (*mem)->process_info = avm->process_info;
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
index 9f0d5f0..f431205 100644 (file)
@@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = {
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
        /* On A+A platform, VRAM can be mapped as WB */
-       if (!adev->gmc.xgmi.connected_to_cpu) {
+       if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
                /* reserve PAT memory space to WC for VRAM */
                int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
                                adev->gmc.aper_size);
index 863fa33..4395c53 100644 (file)
@@ -476,7 +476,8 @@ static int psp_sw_init(void *handle)
                return ret;
 
        ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     AMDGPU_GEM_DOMAIN_VRAM |
+                                     AMDGPU_GEM_DOMAIN_GTT,
                                      &psp->fence_buf_bo,
                                      &psp->fence_buf_mc_addr,
                                      &psp->fence_buf);
@@ -484,7 +485,8 @@ static int psp_sw_init(void *handle)
                goto failed1;
 
        ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     AMDGPU_GEM_DOMAIN_VRAM |
+                                     AMDGPU_GEM_DOMAIN_GTT,
                                      &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
                                      (void **)&psp->cmd_buf_mem);
        if (ret)
index 6bbe3b8..bc11ae5 100644 (file)
@@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
                ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
        }
 
-       ret = amdgpu_bo_create_kernel_at(adev,
-                                        adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
-                                        adev->mman.discovery_tmr_size,
-                                        &adev->mman.discovery_memory,
-                                        NULL);
-       if (ret) {
-               DRM_ERROR("alloc tmr failed(%d)!\n", ret);
-               amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
-               return ret;
+       if (!adev->gmc.is_app_apu) {
+               ret = amdgpu_bo_create_kernel_at(adev,
+                                                adev->gmc.real_vram_size -
+                                                adev->mman.discovery_tmr_size,
+                                                adev->mman.discovery_tmr_size,
+                                                &adev->mman.discovery_memory,
+                                                NULL);
+               if (ret) {
+                       DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+                       amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+                       return ret;
+               }
+       } else {
+               DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
        }
 
        return 0;
@@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
                                adev->gmc.visible_vram_size);
 
-       else
+       else if (!adev->gmc.is_app_apu)
 #endif
                adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
                                adev->gmc.visible_vram_size);
+       else
+               DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n");
 #endif
 
        /*
@@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
         * This is used for VGA emulation and pre-OS scanout buffers to
         * avoid display artifacts while transitioning between pre-OS
         * and driver.  */
-       r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
-                                      &adev->mman.stolen_vga_memory,
-                                      NULL);
-       if (r)
-               return r;
-       r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
-                                      adev->mman.stolen_extended_size,
-                                      &adev->mman.stolen_extended_memory,
-                                      NULL);
-       if (r)
-               return r;
-       r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
-                                      adev->mman.stolen_reserved_size,
-                                      &adev->mman.stolen_reserved_memory,
-                                      NULL);
-       if (r)
-               return r;
+       if (!adev->gmc.is_app_apu) {
+               r = amdgpu_bo_create_kernel_at(adev, 0,
+                                              adev->mman.stolen_vga_size,
+                                              &adev->mman.stolen_vga_memory,
+                                              NULL);
+               if (r)
+                       return r;
+
+               r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+                                              adev->mman.stolen_extended_size,
+                                              &adev->mman.stolen_extended_memory,
+                                              NULL);
+
+               if (r)
+                       return r;
+
+               r = amdgpu_bo_create_kernel_at(adev,
+                                              adev->mman.stolen_reserved_offset,
+                                              adev->mman.stolen_reserved_size,
+                                              &adev->mman.stolen_reserved_memory,
+                                              NULL);
+               if (r)
+                       return r;
+       } else {
+               DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+       }
 
        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
                 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                DRM_ERROR("Failed initializing oa heap.\n");
                return r;
        }
-
        if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
                                AMDGPU_GEM_DOMAIN_GTT,
                                &adev->mman.sdma_access_bo, NULL,
@@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
        amdgpu_ttm_training_reserve_vram_fini(adev);
        /* return the stolen vga memory back to VRAM */
-       amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
-       amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
-       /* return the IP Discovery TMR memory back to VRAM */
-       amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
-       if (adev->mman.stolen_reserved_size)
-               amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
-                                     NULL, NULL);
+       if (!adev->gmc.is_app_apu) {
+               amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+               amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+               /* return the IP Discovery TMR memory back to VRAM */
+               amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+               if (adev->mman.stolen_reserved_size)
+                       amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+                                             NULL, NULL);
+       }
        amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
                                        &adev->mman.sdma_access_ptr);
        amdgpu_ttm_fw_reserve_vram_fini(adev);
@@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
        int r;
 
        if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
-           adev->mman.buffer_funcs_enabled == enable)
+           adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
                return;
 
        if (enable) {
index df63dc3..bc5d126 100644 (file)
@@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        bp.size = amdgpu_vm_pt_size(adev, level);
        bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
-       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+       if (!adev->gmc.is_app_apu)
+               bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       else
+               bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
        bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
        bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
                AMDGPU_GEM_CREATE_CPU_GTT_USWC;
index 1f1268c..42877c4 100644 (file)
@@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
                adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
        if (mec_hpd_size) {
                r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-                                             AMDGPU_GEM_DOMAIN_VRAM,
+                                             AMDGPU_GEM_DOMAIN_VRAM |
+                                             AMDGPU_GEM_DOMAIN_GTT,
                                              &adev->gfx.mec.hpd_eop_obj,
                                              &adev->gfx.mec.hpd_eop_gpu_addr,
                                              (void **)&hpd);
index 16634a7..245de27 100644 (file)
@@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        int r;
 
        /* size in MB on si */
-       adev->gmc.mc_vram_size =
-               adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+       if (!adev->gmc.is_app_apu) {
+               adev->gmc.mc_vram_size =
+                       adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+       } else {
+               DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+               adev->gmc.mc_vram_size = 0;
+       }
        adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 
        if (!(adev->flags & AMD_IS_APU) &&
index 8b9acce..f85ac4d 100644 (file)
@@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
        if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
            dev->kfd->local_mem_info.local_mem_size_public > 0)
                return true;
+
+       if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) {
+               pr_debug("APP APU, Consider like a large bar system\n");
+               return true;
+       }
+
        return false;
 }
 
index 1647592..1aaf933 100644 (file)
@@ -30,6 +30,9 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
+/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
+#define DUMMY_VRAM_SIZE 31138512896
+
 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
  * GPU processor ID are expressed with Bit[31]=1.
  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
 
                        props->heap_type = heap_type;
                        props->flags = flags;
+                       if (size_in_bytes == 0)
+                               size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
                        props->size_in_bytes = size_in_bytes;
                        props->width = width;
 
index 2ddf519..4dea79a 100644 (file)
@@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu)
                }
        }
 
+       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT;
        /* VRAM allocation for driver table */
        for (i = 0; i < SMU_TABLE_COUNT; i++) {
                if (tables[i].size == 0)
                        continue;
 
+               /* If one of the tables has VRAM domain restriction, keep it in
+                * VRAM
+                */
+               if ((tables[i].domain &
+                   (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) ==
+                           AMDGPU_GEM_DOMAIN_VRAM)
+                       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
+
                if (i == SMU_TABLE_PMSTATUSLOG)
                        continue;
 
@@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu)
 
        driver_table->size = max_table_size;
        driver_table->align = PAGE_SIZE;
-       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
 
        ret = amdgpu_bo_create_kernel(adev,
                                      driver_table->size,
index ea8f3d6..8969b3f 100644 (file)
@@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
        SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
-                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+                      PAGE_SIZE,
+                      AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
        SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
-                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+                      PAGE_SIZE,
+                      AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
        smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
        if (!smu_table->metrics_table)