From b453e42a6e8b9fa4580011e923963248c56b9d4d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 5 May 2021 15:18:53 -0400 Subject: [PATCH] drm/amdgpu: Add new placement for preemptible SG BOs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit SG BOs such as dmabuf imports and userptr BOs do not consume system resources directly. Instead they point to resources owned elsewhere. They typically get evicted by DMABuf move notifiers of MMU notifiers. If those notifiers don't need to wait for hardware fences (i.e. the SG BOs are used in a preemptible context), then we don't need to limit them to the GTT size and we don't need TTM to evict them. Create a new placement for such preemptible SG BOs that does not impose artificial size limits and TTM evictions. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 190 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 11 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +- include/uapi/drm/amdgpu_drm.h | 4 + 7 files changed, 247 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6331a11..6cf0fe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ + amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ + amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \ + amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8714d50..6b09d79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -133,7 +133,9 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; - places[c].mem_type = TTM_PL_TT; + places[c].mem_type = + abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? + AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; c++; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c new file mode 100644 index 0000000..d607f31 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2016-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König, Felix Kuehling + */ + +#include "amdgpu.h" + +static inline struct amdgpu_preempt_mgr * +to_preempt_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct amdgpu_preempt_mgr, manager); +} + +/** + * DOC: mem_info_preempt_used + * + * The amdgpu driver provides a sysfs API for reporting current total amount of + * used preemptible memory. + * The file mem_info_preempt_used is used for this, and returns the current + * used size of the preemptible block, in bytes + */ +static ssize_t mem_info_preempt_used_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct ttm_resource_manager *man; + + man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT); + return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man)); +} + +static DEVICE_ATTR_RO(mem_info_preempt_used); + +/** + * amdgpu_preempt_mgr_new - allocate a new node + * + * @man: TTM memory type manager + * @tbo: TTM BO we need this range for + * @place: placement flags and restrictions + * @mem: the resulting mem object + * + * Dummy, just count the space used without allocating resources or any limit. + */ +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource *mem) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + atomic64_add(mem->num_pages, &mgr->used); + + mem->mm_node = NULL; + mem->start = AMDGPU_BO_INVALID_OFFSET; + return 0; +} + +/** + * amdgpu_preempt_mgr_del - free ranges + * + * @man: TTM memory type manager + * @mem: TTM memory object + * + * Free the allocated GTT again. + */ +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *mem) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + atomic64_sub(mem->num_pages, &mgr->used); +} + +/** + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain + * + * @man: TTM memory type manager + * + * Return how many bytes are used in the GTT domain + */ +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + s64 result = atomic64_read(&mgr->used); + + return (result > 0 ? result : 0) * PAGE_SIZE; +} + +/** + * amdgpu_preempt_mgr_debug - dump VRAM table + * + * @man: TTM memory type manager + * @printer: DRM printer to use + * + * Dump the table content using printk. + */ +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n", + man->size, (u64)atomic64_read(&mgr->used)); +} + +static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = { + .alloc = amdgpu_preempt_mgr_new, + .free = amdgpu_preempt_mgr_del, + .debug = amdgpu_preempt_mgr_debug +}; + +/** + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM + * + * @adev: amdgpu_device pointer + * + * Allocate and initialize the GTT manager. + */ +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + man->use_tt = true; + man->func = &amdgpu_preempt_mgr_func; + + ttm_resource_manager_init(man, (1 << 30)); + + atomic64_set(&mgr->used, 0); + + ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_preempt_used\n"); + return ret; + } + + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, + &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_preempt_mgr_fini - free and destroy GTT manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e039f5b..a06c8a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } break; case TTM_PL_TT: + case AMDGPU_PL_PREEMPT: default: amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); break; @@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT); /* Map only what can't be accessed directly */ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { @@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *old_mem = &bo->mem; int r; - if (new_mem->mem_type == TTM_PL_TT) { + if (new_mem->mem_type == TTM_PL_TT || + new_mem->mem_type == AMDGPU_PL_PREEMPT) { r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem); if (r) return r; @@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, goto out; } if (old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_TT) { + (new_mem->mem_type == TTM_PL_TT || + new_mem->mem_type == AMDGPU_PL_PREEMPT)) { ttm_bo_move_null(bo, new_mem); goto out; } - if (old_mem->mem_type == TTM_PL_TT && + if ((old_mem->mem_type == TTM_PL_TT || + old_mem->mem_type == AMDGPU_PL_PREEMPT) && new_mem->mem_type == TTM_PL_SYSTEM) { r = ttm_bo_wait_ctx(bo, ctx); if (r) @@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, /* system memory */ return 0; case TTM_PL_TT: + case AMDGPU_PL_PREEMPT: break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; @@ -1292,7 +1298,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && mem->mem_type != TTM_PL_SYSTEM) flags |= AMDGPU_PTE_VALID; - if (mem && mem->mem_type == TTM_PL_TT) { + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; if (ttm->caching == ttm_cached) @@ -1366,6 +1373,15 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, } switch (bo->mem.mem_type) { + case AMDGPU_PL_PREEMPT: + /* Preemptible BOs don't own system resources managed by the + * driver (pages, VRAM, GART space). They point to resources + * owned by someone else (e.g. pageable memory in user mode + * or a DMABuf). They are used in a preemptible context so we + * can guarantee no deadlocks and good QoS in case of MMU + * notifiers or DMABuf move notifiers from the resource owner. + */ + return false; case TTM_PL_TT: if (amdgpu_bo_is_amdgpu_bo(bo) && amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) @@ -1747,6 +1763,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned)(gtt_size / (1024 * 1024))); + /* Initialize preemptible memory pool */ + r = amdgpu_preempt_mgr_init(adev); + if (r) { + DRM_ERROR("Failed initializing PREEMPT heap.\n"); + return r; + } + /* Initialize various on-chip memory pools */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); if (r) { @@ -1791,6 +1814,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); + amdgpu_preempt_mgr_fini(adev); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); @@ -1985,6 +2009,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, return -EINVAL; } + if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) { + DRM_ERROR("Trying to clear preemptible memory.\n"); + return -EINVAL; + } + if (bo->tbo.mem.mem_type == TTM_PL_TT) { r = amdgpu_ttm_alloc_gart(&bo->tbo); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index eb84a69..2877a92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -31,6 +31,7 @@ #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) +#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 @@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr { atomic64_t available; }; +struct amdgpu_preempt_mgr { + struct ttm_resource_manager manager; + atomic64_t used; +}; + struct amdgpu_mman { struct ttm_device bdev; bool initialized; @@ -70,6 +76,7 @@ struct amdgpu_mman { struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; + struct amdgpu_preempt_mgr preempt_mgr; uint64_t stolen_vga_size; struct amdgpu_bo *stolen_vga_memory; @@ -97,6 +104,8 @@ struct amdgpu_copy_mem { int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev); int amdgpu_vram_mgr_init(struct amdgpu_device *adev); void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); @@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man); int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man); +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); + u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 55991f3..da155c27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo = gem_to_amdgpu_bo(gobj); } mem = &bo->tbo.mem; - if (mem->mem_type == TTM_PL_TT) + if (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT) pages_addr = bo->tbo.ttm->dma_address; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 2063a1c..dc61537d 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -136,6 +136,10 @@ extern "C" { * accessing it with various hw blocks */ #define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- 2.7.4