drm/amdgpu: Correct amdgpu_amdkfd_total_mem_size calculation
authorPhilip Yang <Philip.Yang@amd.com>
Mon, 3 Oct 2022 21:53:25 +0000 (17:53 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Oct 2022 16:08:18 +0000 (12:08 -0400)
amdkfd_total_mem_size is the size of total GPUs vram plus system memory
to estimate page tables memory usage and leave enough VRAM room for page
tables allocation.

Calculate amdkfd_total_mem_size in amdgpu_amdkfd_device_probe is
incorrect because adev->gmc.real_vram_size is still 0 called from
amdgpu_device_ip_early_init. Move the calculation
to amdgpu_amdkfd_device_init to get the correct VRAM size.

Do reverse calculation in amdgpu_amdkfd_device_fini_sw to support
hot-unplugging GPUs.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 9e98f38..03bbfaa 100644 (file)
@@ -75,9 +75,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
                return;
 
        adev->kfd.dev = kgd2kfd_probe(adev, vf);
-
-       if (adev->kfd.dev)
-               amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 }
 
 /**
@@ -201,6 +198,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
                adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
                                                adev_to_drm(adev), &gpu_resources);
 
+               amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
                INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
        }
 }
@@ -210,6 +209,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
        if (adev->kfd.dev) {
                kgd2kfd_device_exit(adev->kfd.dev);
                adev->kfd.dev = NULL;
+               amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
        }
 }