sba.IndirectObjectMOCS = mocs;
sba.InstructionMOCS = mocs;
sba.SurfaceStateMOCS = mocs;
+#if GFX_VER >= 9
+ sba.BindlessSurfaceStateMOCS = mocs;
+#endif
sba.GeneralStateBaseAddressModifyEnable = true;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.SurfaceStateBaseAddressModifyEnable = true;
-#if GFX_VER >= 9
- sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SCRATCH_START);
- sba.BindlessSurfaceStateSize = (IRIS_SCRATCH_ZONE_SIZE >> 12) - 1;
- sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
- sba.BindlessSurfaceStateMOCS = mocs;
-#endif
#if GFX_VER >= 11
sba.BindlessSamplerStateMOCS = mocs;
#endif
#define GFX8_BTI_STATELESS_NON_COHERENT 253
#define GFX9_BTI_BINDLESS 252
+/* This ID doesn't map anything HW related value. It exists to inform the
+ * lowering code to not use the bindless heap.
+ */
+#define GFX125_NON_BINDLESS (1u << 16)
+
/* Dataport atomic operations for Untyped Atomic Integer Operation message
* (and others).
*/
fs_reg handle = component(ubld.vgrf(BRW_REGISTER_TYPE_UD), 0);
ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_imm_ud(~0x3ffu));
+ srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX125_NON_BINDLESS);
srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = handle;
} else if (devinfo->ver >= 8) {
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
fs_reg handle = component(ubld.vgrf(BRW_REGISTER_TYPE_UD), 0);
ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_imm_ud(~0x3ffu));
+ srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX125_NON_BINDLESS);
srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = handle;
} else if (devinfo->ver >= 8) {
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
unspill_inst->sfid = GFX12_SFID_UGM;
unspill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
unspill_inst->exec_size,
- LSC_ADDR_SURFTYPE_BSS,
+ LSC_ADDR_SURFTYPE_SS,
LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32,
spill_inst->sfid = GFX12_SFID_UGM;
spill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE,
bld.dispatch_width(),
- LSC_ADDR_SURFTYPE_BSS,
+ LSC_ADDR_SURFTYPE_SS,
LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32,
else
inst->sfid = GFX12_SFID_UGM;
- /* We must have exactly one of surface and surface_handle */
- assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
+ /* We should have exactly one of surface and surface_handle. For scratch
+ * messages generated by brw_fs_nir.cpp we also allow a special value to
+ * know what heap base we should use in STATE_BASE_ADDRESS (SS = Surface
+ * State Offset, or BSS = Bindless Surface State Offset).
+ */
+ bool non_bindless = surface.file == IMM && surface.ud == GFX125_NON_BINDLESS;
+ assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE) ||
+ (non_bindless && surface_handle.file != BAD_FILE));
enum lsc_addr_surface_type surf_type;
- if (surface_handle.file != BAD_FILE)
- surf_type = LSC_ADDR_SURFTYPE_BSS;
- else if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)
+ if (surface_handle.file != BAD_FILE) {
+ assert(surface.file == IMM && (surface.ud == 0 || surface.ud == GFX125_NON_BINDLESS));
+ surf_type = non_bindless ? LSC_ADDR_SURFTYPE_SS : LSC_ADDR_SURFTYPE_BSS;
+ } else if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)
surf_type = LSC_ADDR_SURFTYPE_FLAT;
else
surf_type = LSC_ADDR_SURFTYPE_BTI;
case LSC_ADDR_SURFTYPE_FLAT:
inst->src[1] = brw_imm_ud(0);
break;
+ case LSC_ADDR_SURFTYPE_SS:
case LSC_ADDR_SURFTYPE_BSS:
/* We assume that the driver provided the handle in the top 20 bits so
* we can use the surface handle directly as the extended descriptor.
for (unsigned i = 0; i < 16; i++) {
if (pool->surf_states[i].map != NULL) {
- anv_state_pool_free(&device->internal_surface_state_pool,
+ anv_state_pool_free(&device->scratch_surface_state_pool,
pool->surf_states[i]);
}
}
struct anv_scratch_pool *pool,
unsigned per_thread_scratch)
{
+ assert(device->info->verx10 >= 125);
+
if (per_thread_scratch == 0)
return 0;
struct anv_address addr = { .bo = bo };
struct anv_state state =
- anv_state_pool_alloc(&device->internal_surface_state_pool,
+ anv_state_pool_alloc(&device->scratch_surface_state_pool,
device->isl_dev.ss.size, 64);
isl_buffer_fill_state(&device->isl_dev, state.map,
uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2],
0, state.offset);
if (current) {
- anv_state_pool_free(&device->internal_surface_state_pool, state);
+ anv_state_pool_free(&device->scratch_surface_state_pool, state);
return current;
} else {
pool->surf_states[scratch_size_log2] = state;
}
/* Add all the global BOs to the object list for softpin case. */
- result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
+ result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
if (result != VK_SUCCESS)
return result;
if (result != VK_SUCCESS)
return result;
+ result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
+ if (result != VK_SUCCESS)
+ return result;
+
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
if (result != VK_SUCCESS)
return result;
return ret_bo;
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
return ret_bo;
- if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
+ if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
return ret_bo;
if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
return ret_bo;
+ if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
+ return ret_bo;
if (!device->cmd_buffer_being_decoded)
return (struct intel_batch_decode_bo) { };
if (result != VK_SUCCESS)
goto fail_dynamic_state_pool;
- result = anv_state_pool_init(&device->internal_surface_state_pool, device,
- "internal surface state pool",
- INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
+ if (device->info->verx10 >= 125) {
+ /* Put the scratch surface states at the beginning of the internal
+ * surface state pool.
+ */
+ result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
+ "scratch surface state pool",
+ SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
+ if (result != VK_SUCCESS)
+ goto fail_instruction_state_pool;
+
+ result = anv_state_pool_init(&device->internal_surface_state_pool, device,
+ "internal surface state pool",
+ INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
+ SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
+ } else {
+ result = anv_state_pool_init(&device->internal_surface_state_pool, device,
+ "internal surface state pool",
+ INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
+ }
if (result != VK_SUCCESS)
- goto fail_instruction_state_pool;
+ goto fail_scratch_surface_state_pool;
result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
"bindless surface state pool",
* to zero and they have a valid descriptor.
*/
device->null_surface_state =
- anv_state_pool_alloc(&device->internal_surface_state_pool,
+ anv_state_pool_alloc(device->info->verx10 >= 125 ?
+ &device->scratch_surface_state_pool :
+ &device->internal_surface_state_pool,
device->isl_dev.ss.size,
device->isl_dev.ss.align);
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
anv_state_pool_finish(&device->bindless_surface_state_pool);
fail_internal_surface_state_pool:
anv_state_pool_finish(&device->internal_surface_state_pool);
+ fail_scratch_surface_state_pool:
+ if (device->info->verx10 >= 125)
+ anv_state_pool_finish(&device->scratch_surface_state_pool);
fail_instruction_state_pool:
anv_state_pool_finish(&device->instruction_state_pool);
fail_dynamic_state_pool:
}
anv_state_pool_finish(&device->binding_table_pool);
+ if (device->info->verx10 >= 125)
+ anv_state_pool_finish(&device->scratch_surface_state_pool);
anv_state_pool_finish(&device->internal_surface_state_pool);
anv_state_pool_finish(&device->bindless_surface_state_pool);
anv_state_pool_finish(&device->instruction_state_pool);
.h = image->vk.extent.height,
.d = image->vk.extent.depth,
});
+
+ iview->planes[vplane].lowered_surface_state_is_null = true;
}
}
}
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
-#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
-#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
+#define SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB (8MiB overlaps surface state pool) */
+#define SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001407fffffULL
+#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB (64MiB) */
+#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001c3ffffffULL
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL
#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */
#define BINDING_TABLE_POOL_SIZE \
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
-#define INTERNAL_SURFACE_STATE_POOL_SIZE \
- (INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
+#define SCRATCH_SURFACE_STATE_POOL_SIZE \
+ (SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define BINDLESS_SURFACE_STATE_POOL_SIZE \
(BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
+#define INTERNAL_SURFACE_STATE_POOL_SIZE \
+ (INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define INSTRUCTION_STATE_POOL_SIZE \
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
#define CLIENT_VISIBLE_HEAP_SIZE \
struct anv_state_pool dynamic_state_pool;
struct anv_state_pool instruction_state_pool;
struct anv_state_pool binding_table_pool;
+ struct anv_state_pool scratch_surface_state_pool;
struct anv_state_pool internal_surface_state_pool;
struct anv_state_pool bindless_surface_state_pool;
*/
struct anv_surface_state storage_surface_state;
struct anv_surface_state lowered_storage_surface_state;
+
+ bool lowered_surface_state_is_null;
} planes[3];
};
blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
uint32_t offset)
{
+#if GFX_VERX10 >= 125
+ return SCRATCH_SURFACE_STATE_POOL_SIZE + offset;
+#else
return offset;
+#endif
}
static void *
continue;
}
const struct anv_descriptor *desc = &set->descriptors[binding->index];
-
+ /* Relative offset in the STATE_BASE_ADDRESS::SurfaceStateBaseAddress
+ * heap. Depending on where the descriptor surface state is
+ * allocated, they can either come from
+ * device->internal_surface_state_pool or
+ * device->bindless_surface_state_pool.
+ */
switch (desc->type) {
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
case VK_DESCRIPTOR_TYPE_SAMPLER:
binding->lowered_storage_surface
? desc->image_view->planes[binding->plane].lowered_storage_surface_state
: desc->image_view->planes[binding->plane].storage_surface_state;
- surface_state =
- anv_bindless_state_for_binding_table(sstate.state);
+ const bool lowered_surface_state_is_null =
+ desc->image_view->planes[binding->plane].lowered_surface_state_is_null;
+ surface_state = anv_bindless_state_for_binding_table(sstate.state);
assert(surface_state.alloc_size);
- if (surface_state.offset == 0) {
+ if (binding->lowered_storage_surface && lowered_surface_state_is_null) {
mesa_loge("Bound a image to a descriptor where the "
"descriptor does not have NonReadable "
"set and the image does not have a "
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), btp) {
btp._3DCommandSubOpcode = binding_table_opcodes[s];
+#if GFX_VERX10 >= 125
+ btp.PointertoVSBindingTable = SCRATCH_SURFACE_STATE_POOL_SIZE +
+ cmd_buffer->state.binding_tables[s].offset;
+#else
btp.PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset;
+#endif
}
}
}