struct nouveau_ws_bo *
nvk_slm_area_get_bo_ref(struct nvk_slm_area *area,
uint32_t *bytes_per_warp_out,
- uint32_t *bytes_per_mp_out)
+ uint32_t *bytes_per_tpc_out)
{
simple_mtx_lock(&area->mutex);
struct nouveau_ws_bo *bo = area->bo;
if (bo)
nouveau_ws_bo_ref(bo);
*bytes_per_warp_out = area->bytes_per_warp;
- *bytes_per_mp_out = area->bytes_per_mp;
+ *bytes_per_tpc_out = area->bytes_per_tpc;
simple_mtx_unlock(&area->mutex);
return bo;
*/
bytes_per_warp = ALIGN(bytes_per_warp, 0x200);
- uint64_t bytes_per_mp = bytes_per_warp * 64; /* max warps */
+ uint64_t bytes_per_tpc = bytes_per_warp * 64; /* max warps */
/* The hardware seems to require this alignment for
* NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A_SIZE_LOWER.
* by the number of warps, 64. It might matter for real on a GPU with 48
* warps but we don't support any of those yet.
*/
- assert(bytes_per_mp == ALIGN(bytes_per_mp, 0x8000));
+ assert(bytes_per_tpc == ALIGN(bytes_per_tpc, 0x8000));
/* nvk_slm_area::bytes_per_mp only ever increases so we can check this
* outside the lock and exit early in the common case. We only need to
* Also, we only care about bytes_per_mp and not bytes_per_warp because
* they are integer multiples of each other.
*/
- if (likely(bytes_per_mp <= area->bytes_per_mp))
+ if (likely(bytes_per_tpc <= area->bytes_per_tpc))
return VK_SUCCESS;
- uint64_t size = bytes_per_mp * dev->ws_dev->mp_count;
+ uint64_t size = bytes_per_tpc * dev->ws_dev->tpc_count;
/* The hardware seems to require this alignment for
* NV9097_SET_SHADER_LOCAL_MEMORY_D_SIZE_LOWER.
struct nouveau_ws_bo *unref_bo;
simple_mtx_lock(&area->mutex);
- if (bytes_per_mp <= area->bytes_per_mp) {
+ if (bytes_per_tpc <= area->bytes_per_tpc) {
/* We lost the race, throw away our BO */
assert(area->bytes_per_warp == bytes_per_warp);
unref_bo = bo;
unref_bo = area->bo;
area->bo = bo;
area->bytes_per_warp = bytes_per_warp;
- area->bytes_per_mp = bytes_per_mp;
+ area->bytes_per_tpc = bytes_per_tpc;
}
simple_mtx_unlock(&area->mutex);
simple_mtx_t mutex;
struct nouveau_ws_bo *bo;
uint32_t bytes_per_warp;
- uint32_t bytes_per_mp;
+ uint32_t bytes_per_tpc;
};
struct nouveau_ws_bo *
struct nvk_queue_state *qs)
{
struct nouveau_ws_bo *bo;
- uint32_t alloc_count, bytes_per_warp, bytes_per_mp;
+ uint32_t alloc_count, bytes_per_warp, bytes_per_tpc;
bool dirty = false;
bo = nvk_descriptor_table_get_bo_ref(&dev->images, &alloc_count);
}
}
- bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_mp);
+ bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
- qs->slm.bytes_per_mp != bytes_per_mp) {
+ qs->slm.bytes_per_tpc != bytes_per_tpc) {
if (qs->slm.bo)
nouveau_ws_bo_destroy(qs->slm.bo);
qs->slm.bo = bo;
qs->slm.bytes_per_warp = bytes_per_warp;
- qs->slm.bytes_per_mp = bytes_per_mp;
+ qs->slm.bytes_per_tpc = bytes_per_tpc;
dirty = true;
} else {
/* No change */
const uint64_t slm_addr = qs->slm.bo->offset;
const uint64_t slm_size = qs->slm.bo->size;
const uint64_t slm_per_warp = qs->slm.bytes_per_warp;
- const uint64_t slm_per_mp = qs->slm.bytes_per_mp;
- assert(!(slm_per_mp & 0x7fff));
+ const uint64_t slm_per_tpc = qs->slm.bytes_per_tpc;
+ assert(!(slm_per_tpc & 0x7fff));
/* Compute */
P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_A);
P_NVA0C0_SET_SHADER_LOCAL_MEMORY_B(p, slm_addr);
P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A);
- P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(p, slm_per_mp >> 32);
- P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(p, slm_per_mp);
+ P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(p, slm_per_tpc >> 32);
+ P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(p, slm_per_tpc);
P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_C(p, 0xff);
if (dev->pdev->info.cls_compute < VOLTA_COMPUTE_A) {
P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_THROTTLED_A);
- P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(p, slm_per_mp >> 32);
- P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(p, slm_per_mp);
+ P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(p, slm_per_tpc >> 32);
+ P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(p, slm_per_tpc);
P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_C(p, 0xff);
}
struct {
struct nouveau_ws_bo *bo;
uint32_t bytes_per_warp;
- uint32_t bytes_per_mp;
+ uint32_t bytes_per_tpc;
} slm;
struct {
if (nouveau_ws_param(fd, NOUVEAU_GETPARAM_GRAPH_UNITS, &value))
goto out_err;
device->gpc_count = value & 0x000000ff;
- device->mp_count = value >> 8;
+ device->tpc_count = value >> 8;
nouveau_ws_device_set_dbg_flags(device);
uint32_t local_mem_domain;
uint8_t gpc_count;
- uint16_t mp_count;
+ uint16_t tpc_count;
enum nvk_debug debug_flags;