From 1e4a00334add40f609162914af7a24bc92951008 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 21 Feb 2023 17:31:32 -0500 Subject: [PATCH] drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3 Treat system memory on NUMA systems as remote by default. Overriding with a more efficient MTYPE per page will be implemented in the next patch. No need for a special case for APP APUs. System memory is handled the same for carve-out and native mode. And VRAM doesn't exist in native mode. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-and-tested-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 ++++++++++++++--------------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++++++--------- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8623b93..cf976b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1186,9 +1186,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - unsigned int mtype; - unsigned int mtype_default; + /* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/ + unsigned int mtype_local, mtype; bool snoop = false; + bool is_local; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): @@ -1228,35 +1229,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } break; case IP_VERSION(9, 4, 3): - /* FIXME: Needs more work for handling multiple memory - * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU - * modes. - * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. - * To force use of MTYPE_RW, set use_mtype_cc_wa=0 + /* Only local VRAM BOs or system memory on non-NUMA APUs + * can be assumed to be local in their entirety. Choose + * MTYPE_NC as safe fallback for all system memory BOs on + * NUMA systems. Their MTYPE can be overridden per-page in + * gmc_v9_0_override_vm_pte_flags. */ - mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + is_local = (!is_vram && (adev->flags & AMD_IS_APU) && + num_possible_nodes() <= 1) || + (is_vram && adev == bo_adev /* TODO: memory partitions && + bo->mem_id == vm->mem_id*/); snoop = true; if (uncached) { mtype = MTYPE_UC; - } else if (adev->gmc.is_app_apu) { - /* FIXME: APU in native mode, NPS1 single socket only - * - * For suporting NUMA partitioned APU e.g. in NPS4 mode, - * this need to look at the NUMA node on which the - * system memory allocation was done. - * - * Memory access by a different partition within same - * socket should be treated as remote access so MTYPE_RW - * cannot be used always. - */ - mtype = mtype_default; } else if (adev->flags & AMD_IS_APU) { - /* APU on carve out mode */ - mtype = mtype_default; + mtype = is_local ? mtype_local : MTYPE_NC; } else { /* dGPU */ - if (is_vram && bo_adev == adev) - mtype = mtype_default; + if (is_local) + mtype = mtype_local; else if (is_vram) mtype = MTYPE_NC; else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 477ef92..4eec75b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1151,6 +1151,7 @@ svm_range_get_pte_flags(struct kfd_node *node, bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; + unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; @@ -1191,19 +1192,16 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - //TODO: Need more work for handling multiple memory partitions - //e.g. NPS4. Current approch is only applicable without memory - //partitions. + mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : + AMDGPU_VM_MTYPE_RW; snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition - * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. - * To force use of MTYPE_RW, set use_mtype_cc_wa=0 - */ - if (bo_node == node) - mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + /* local HBM region close to partition */ + if (bo_node->adev == node->adev /* TODO: memory partitions && + bo_node->mem_id == node->mem_id*/) + mapping_flags |= mtype_local; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -1212,7 +1210,13 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= AMDGPU_VM_MTYPE_UC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { - mapping_flags |= AMDGPU_VM_MTYPE_NC; + /* On NUMA systems, locality is determined per-page + * in amdgpu_gmc_override_vm_pte_flags + */ + if (num_possible_nodes() <= 1) + mapping_flags |= mtype_local; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- 2.7.4