nvk: replace mp with tpc
authorKarol Herbst <git@karolherbst.de>
Thu, 20 Jul 2023 20:48:58 +0000 (22:48 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 4 Aug 2023 21:32:06 +0000 (21:32 +0000)
The tls space is calculate per TPC, but nouveau also doesn't report the MP
count to us, but the TPC count instead.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>

src/nouveau/vulkan/nvk_device.c
src/nouveau/vulkan/nvk_device.h
src/nouveau/vulkan/nvk_queue.c
src/nouveau/vulkan/nvk_queue.h
src/nouveau/winsys/nouveau_device.c
src/nouveau/winsys/nouveau_device.h

index 1bc91e9..1e18621 100644 (file)
@@ -30,14 +30,14 @@ nvk_slm_area_finish(struct nvk_slm_area *area)
 struct nouveau_ws_bo *
 nvk_slm_area_get_bo_ref(struct nvk_slm_area *area,
                         uint32_t *bytes_per_warp_out,
-                        uint32_t *bytes_per_mp_out)
+                        uint32_t *bytes_per_tpc_out)
 {
    simple_mtx_lock(&area->mutex);
    struct nouveau_ws_bo *bo = area->bo;
    if (bo)
       nouveau_ws_bo_ref(bo);
    *bytes_per_warp_out = area->bytes_per_warp;
-   *bytes_per_mp_out = area->bytes_per_mp;
+   *bytes_per_tpc_out = area->bytes_per_tpc;
    simple_mtx_unlock(&area->mutex);
 
    return bo;
@@ -60,7 +60,7 @@ nvk_slm_area_ensure(struct nvk_device *dev,
     */
    bytes_per_warp = ALIGN(bytes_per_warp, 0x200);
 
-   uint64_t bytes_per_mp = bytes_per_warp * 64; /* max warps */
+   uint64_t bytes_per_tpc = bytes_per_warp * 64; /* max warps */
 
    /* The hardware seems to require this alignment for
     * NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A_SIZE_LOWER.
@@ -69,7 +69,7 @@ nvk_slm_area_ensure(struct nvk_device *dev,
     * by the number of warps, 64.  It might matter for real on a GPU with 48
     * warps but we don't support any of those yet.
     */
-   assert(bytes_per_mp == ALIGN(bytes_per_mp, 0x8000));
+   assert(bytes_per_tpc == ALIGN(bytes_per_tpc, 0x8000));
 
    /* nvk_slm_area::bytes_per_mp only ever increases so we can check this
     * outside the lock and exit early in the common case.  We only need to
@@ -78,10 +78,10 @@ nvk_slm_area_ensure(struct nvk_device *dev,
     * Also, we only care about bytes_per_mp and not bytes_per_warp because
     * they are integer multiples of each other.
     */
-   if (likely(bytes_per_mp <= area->bytes_per_mp))
+   if (likely(bytes_per_tpc <= area->bytes_per_tpc))
       return VK_SUCCESS;
 
-   uint64_t size = bytes_per_mp * dev->ws_dev->mp_count;
+   uint64_t size = bytes_per_tpc * dev->ws_dev->tpc_count;
 
    /* The hardware seems to require this alignment for
     * NV9097_SET_SHADER_LOCAL_MEMORY_D_SIZE_LOWER.
@@ -95,7 +95,7 @@ nvk_slm_area_ensure(struct nvk_device *dev,
 
    struct nouveau_ws_bo *unref_bo;
    simple_mtx_lock(&area->mutex);
-   if (bytes_per_mp <= area->bytes_per_mp) {
+   if (bytes_per_tpc <= area->bytes_per_tpc) {
       /* We lost the race, throw away our BO */
       assert(area->bytes_per_warp == bytes_per_warp);
       unref_bo = bo;
@@ -103,7 +103,7 @@ nvk_slm_area_ensure(struct nvk_device *dev,
       unref_bo = area->bo;
       area->bo = bo;
       area->bytes_per_warp = bytes_per_warp;
-      area->bytes_per_mp = bytes_per_mp;
+      area->bytes_per_tpc = bytes_per_tpc;
    }
    simple_mtx_unlock(&area->mutex);
 
index deb4947..8032fad 100644 (file)
@@ -17,7 +17,7 @@ struct nvk_slm_area {
    simple_mtx_t mutex;
    struct nouveau_ws_bo *bo;
    uint32_t bytes_per_warp;
-   uint32_t bytes_per_mp;
+   uint32_t bytes_per_tpc;
 };
 
 struct nouveau_ws_bo *
index afc703c..60b6fd0 100644 (file)
@@ -54,7 +54,7 @@ nvk_queue_state_update(struct nvk_device *dev,
                        struct nvk_queue_state *qs)
 {
    struct nouveau_ws_bo *bo;
-   uint32_t alloc_count, bytes_per_warp, bytes_per_mp;
+   uint32_t alloc_count, bytes_per_warp, bytes_per_tpc;
    bool dirty = false;
 
    bo = nvk_descriptor_table_get_bo_ref(&dev->images, &alloc_count);
@@ -96,14 +96,14 @@ nvk_queue_state_update(struct nvk_device *dev,
       }
    }
 
-   bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_mp);
+   bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
    if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
-       qs->slm.bytes_per_mp != bytes_per_mp) {
+       qs->slm.bytes_per_tpc != bytes_per_tpc) {
       if (qs->slm.bo)
          nouveau_ws_bo_destroy(qs->slm.bo);
       qs->slm.bo = bo;
       qs->slm.bytes_per_warp = bytes_per_warp;
-      qs->slm.bytes_per_mp = bytes_per_mp;
+      qs->slm.bytes_per_tpc = bytes_per_tpc;
       dirty = true;
    } else {
       /* No change */
@@ -191,8 +191,8 @@ nvk_queue_state_update(struct nvk_device *dev,
       const uint64_t slm_addr = qs->slm.bo->offset;
       const uint64_t slm_size = qs->slm.bo->size;
       const uint64_t slm_per_warp = qs->slm.bytes_per_warp;
-      const uint64_t slm_per_mp = qs->slm.bytes_per_mp;
-      assert(!(slm_per_mp & 0x7fff));
+      const uint64_t slm_per_tpc = qs->slm.bytes_per_tpc;
+      assert(!(slm_per_tpc & 0x7fff));
 
       /* Compute */
       P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_A);
@@ -200,14 +200,14 @@ nvk_queue_state_update(struct nvk_device *dev,
       P_NVA0C0_SET_SHADER_LOCAL_MEMORY_B(p, slm_addr);
 
       P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A);
-      P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(p, slm_per_mp >> 32);
-      P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(p, slm_per_mp);
+      P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(p, slm_per_tpc >> 32);
+      P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(p, slm_per_tpc);
       P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_C(p, 0xff);
 
       if (dev->pdev->info.cls_compute < VOLTA_COMPUTE_A) {
          P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_THROTTLED_A);
-         P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(p, slm_per_mp >> 32);
-         P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(p, slm_per_mp);
+         P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(p, slm_per_tpc >> 32);
+         P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(p, slm_per_tpc);
          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_C(p, 0xff);
       }
 
index 68ecc6f..25e2059 100644 (file)
@@ -27,7 +27,7 @@ struct nvk_queue_state {
    struct {
       struct nouveau_ws_bo *bo;
       uint32_t bytes_per_warp;
-      uint32_t bytes_per_mp;
+      uint32_t bytes_per_tpc;
    } slm;
 
    struct {
index cb08463..7c6c1d1 100644 (file)
@@ -257,7 +257,7 @@ nouveau_ws_device_new(drmDevicePtr drm_device)
    if (nouveau_ws_param(fd, NOUVEAU_GETPARAM_GRAPH_UNITS, &value))
       goto out_err;
    device->gpc_count = value & 0x000000ff;
-   device->mp_count = value >> 8;
+   device->tpc_count = value >> 8;
 
    nouveau_ws_device_set_dbg_flags(device);
 
index 410ab71..b16b3d3 100644 (file)
@@ -38,7 +38,7 @@ struct nouveau_ws_device {
    uint32_t local_mem_domain;
 
    uint8_t gpc_count;
-   uint16_t mp_count;
+   uint16_t tpc_count;
 
    enum nvk_debug debug_flags;