.cs_ibo = true,
.gfx_ibo = true,
.gfx_shared_const = true,
- .cs_bindless = 0x1f,
- .gfx_bindless = 0x1f,));
+ .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
+ .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,));
tu6_emit_xs_config<CHIP>(cs, MESA_SHADER_VERTEX, vs);
tu6_emit_xs_config<CHIP>(cs, MESA_SHADER_TESS_CTRL, NULL);
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_INVALIDATE);
if (flushes & TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE) {
tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
- .cs_bindless = 0x1f,
- .gfx_bindless = 0x1f,
+ .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
+ .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
));
}
if (flushes & TU_CMD_FLAG_WAIT_MEM_WRITES)
.gfx_ibo = true,
.cs_shared_const = true,
.gfx_shared_const = true,
- .cs_bindless = 0x1f,
- .gfx_bindless = 0x1f,));
+ .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
+ .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,));
tu_cs_emit_wfi(cs);
tu_cs_emit_array(cs, (const uint32_t*)descriptors_state->set_iova, 2 * descriptors_state->max_sets_bound);
}
- /* Dynamic descriptors get the last descriptor set. */
+ /* Dynamic descriptors get the reserved descriptor set. */
if (descriptors_state->dynamic_bound) {
- tu_cs_emit_pkt4(cs, sp_bindless_base_reg + 4 * 2, 2);
- tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
+ int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
+ assert(reserved_set_idx >= 0); /* reserved set must be bound */
+
+ tu_cs_emit_pkt4(cs, sp_bindless_base_reg + reserved_set_idx * 2, 2);
+ tu_cs_emit_qw(cs, descriptors_state->set_iova[reserved_set_idx]);
if (CHIP == A6XX) {
- tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + 4 * 2, 2);
- tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
+ tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + reserved_set_idx * 2, 2);
+ tu_cs_emit_qw(cs, descriptors_state->set_iova[reserved_set_idx]);
}
}
tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
- .cs_bindless = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE ? 0x1f : 0,
- .gfx_bindless = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ? 0x1f : 0,
+ .cs_bindless = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE ? CHIP == A6XX ? 0x1f : 0xff : 0,
+ .gfx_bindless = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ? CHIP == A6XX ? 0x1f : 0xff : 0,
));
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
if (layout->dynamic_offset_size) {
/* allocate and fill out dynamic descriptor set */
struct tu_cs_memory dynamic_desc_set;
+ int reserved_set_idx = cmd->device->physical_device->reserved_set_idx;
VkResult result = tu_cs_alloc(&cmd->sub_cs,
layout->dynamic_offset_size / (4 * A6XX_TEX_CONST_DWORDS),
A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
layout->dynamic_offset_size);
- descriptors_state->set_iova[MAX_SETS] = dynamic_desc_set.iova | BINDLESS_DESCRIPTOR_64B;
+ assert(reserved_set_idx >= 0); /* reserved set must be bound */
+ descriptors_state->set_iova[reserved_set_idx] = dynamic_desc_set.iova | BINDLESS_DESCRIPTOR_64B;
descriptors_state->dynamic_bound = true;
}
struct tu_descriptor_set *sets[MAX_SETS];
struct tu_descriptor_set push_set;
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
- uint64_t set_iova[MAX_SETS + 1];
+ uint64_t set_iova[MAX_SETS];
uint32_t max_sets_bound;
bool dynamic_bound;
};
unsigned dynamic_offset_size = 0;
for (uint32_t set = 0; set < layout->num_sets; set++) {
- assert(set < MAX_SETS);
layout->set[set].dynamic_offset_start = dynamic_offset_size;
if (layout->set[set].layout)
TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout,
pCreateInfo->pSetLayouts[set]);
- assert(set < MAX_SETS);
+ assert(set < device->physical_device->usable_sets);
layout->set[set].layout = set_layout;
if (set_layout)
vk_descriptor_set_layout_ref(&set_layout->vk);
/* descriptorSetLayout should be ignored for push descriptors
* and instead it refers to pipelineLayout and set.
*/
- assert(pCreateInfo->set < MAX_SETS);
+ assert(pCreateInfo->set < device->physical_device->usable_sets);
set_layout = pipeline_layout->set[pCreateInfo->set].layout;
} else {
TU_FROM_HANDLE(tu_descriptor_set_layout, _set_layout,
#include "vk_descriptor_set_layout.h"
-/* The hardware supports 5 descriptor sets, but we reserve 1 for dynamic
- * descriptors and input attachments.
+/* The hardware supports up to 8 descriptor sets since A7XX.
+ * Note: This is the maximum across generations, not the maximum for a
+ * particular generation so it should only be used for allocation.
*/
-#define MAX_SETS 4
+#define MAX_SETS 8
/* I have no idea what the maximum size is, but the hardware supports very
* large numbers of descriptors (at least 2^16). This limit is based on
device->ccu_offset_bypass = depth_cache_size;
device->ccu_offset_gmem = device->gmem_size - color_cache_size;
+
+ device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
break;
}
default:
.maxSamplerAllocationCount = 64 * 1024,
.bufferImageGranularity = 64, /* A cache line */
.sparseAddressSpaceSize = 0,
- .maxBoundDescriptorSets = MAX_SETS,
+ .maxBoundDescriptorSets = pdevice->usable_sets,
.maxPerStageDescriptorSamplers = max_descriptor_set_size,
.maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
.maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
properties->bufferlessPushDescriptors = true;
properties->allowSamplerImageViewPostSubmitCreation = true;
properties->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
- properties->maxDescriptorBufferBindings = MAX_SETS;
- properties->maxResourceDescriptorBufferBindings = MAX_SETS;
- properties->maxSamplerDescriptorBufferBindings = MAX_SETS;
- properties->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
+ properties->maxDescriptorBufferBindings = pdevice->usable_sets;
+ properties->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
+ properties->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
+ properties->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
properties->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
properties->bufferCaptureReplayDescriptorDataSize = 0;
properties->imageCaptureReplayDescriptorDataSize = 0;
uint32_t ccu_offset_gmem;
uint32_t ccu_offset_bypass;
+ /* Amount of usable descriptor sets, this excludes any reserved set */
+ uint32_t usable_sets;
+ /* Index of the reserved descriptor set, may be -1 if unset */
+ int32_t reserved_set_idx;
+
bool has_set_iova;
uint64_t va_start;
uint64_t va_size;
}
static void
-tu6_emit_load_state(struct tu_pipeline *pipeline,
+tu6_emit_load_state(struct tu_device *device,
+ struct tu_pipeline *pipeline,
struct tu_pipeline_layout *layout)
{
unsigned size = tu6_load_state_size(pipeline, layout);
continue;
switch (binding->type) {
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- base = MAX_SETS;
+ assert(device->physical_device->reserved_set_idx >= 0);
+ base = device->physical_device->reserved_set_idx;
offset = (layout->set[i].dynamic_offset_start +
binding->dynamic_offset_offset) / 4;
FALLTHROUGH;
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- base = MAX_SETS;
+ assert(device->physical_device->reserved_set_idx >= 0);
+ base = device->physical_device->reserved_set_idx;
offset = (layout->set[i].dynamic_offset_start +
binding->dynamic_offset_offset) / 4;
FALLTHROUGH;
const struct tu_shader *shader,
struct tu_pipeline_builder *builder)
{
+ const struct tu_physical_device *phys_dev = cs->device->physical_device;
if (!xs || shader->const_state.dynamic_offset_loc == UINT32_MAX)
return;
- tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 3 + MAX_SETS);
+ tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 3 + phys_dev->usable_sets);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(shader->const_state.dynamic_offset_loc / 4) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(xs->type)) |
- CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(MAX_SETS, 4)));
+ CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(phys_dev->usable_sets, 4)));
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < MAX_SETS; i++) {
+ for (unsigned i = 0; i < phys_dev->usable_sets; i++) {
unsigned dynamic_offset_start =
builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS * 4);
tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0);
struct tu_graphics_lib_pipeline *library = builder->libraries[i];
builder->layout.num_sets = MAX2(builder->layout.num_sets,
library->num_sets);
+ assert(builder->layout.num_sets <= builder->device->physical_device->usable_sets);
for (unsigned j = 0; j < library->num_sets; j++) {
- if (library->layouts[i])
- builder->layout.set[i].layout = library->layouts[i];
+ builder->layout.set[i].layout = library->layouts[i];
}
builder->layout.push_constant_size = library->push_constant_size;
/* Blob doesn't preload state on A7XX, likely preloading either
* doesn't work or doesn't provide benefits.
*/
- tu6_emit_load_state(*pipeline, &builder->layout);
+ tu6_emit_load_state(builder->device, *pipeline, &builder->layout);
}
}
pipeline->local_size[i] = v->local_size[i];
if (CHIP == A6XX) {
- tu6_emit_load_state(&pipeline->base, layout);
+ tu6_emit_load_state(dev, &pipeline->base, layout);
}
tu_append_executable(&pipeline->base, v, nir_initial_disasm);
}
static void
-lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
+lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
+ nir_intrinsic_instr *instr,
struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
base = nir_imm_int(b, (layout->set[set].dynamic_offset_start +
binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS));
}
- set = MAX_SETS;
+ assert(dev->physical_device->reserved_set_idx >= 0);
+ set = dev->physical_device->reserved_set_idx;
break;
default:
base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS));
descriptor_idx = nir_iadd_imm(b, descriptor_idx, 1);
}
- nir_def *results[MAX_SETS + 1] = { NULL };
+ nir_def *results[MAX_SETS] = { NULL };
if (nir_scalar_is_const(scalar_idx)) {
nir_def *bindless =
}
nir_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);
- for (unsigned i = 0; i < MAX_SETS + 1; i++) {
+ for (unsigned i = 0; i < dev->physical_device->info->a6xx.max_sets; i++) {
/* if (base_idx == i) { ... */
nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
nir_def *result =
nir_undef(b, intrin->def.num_components, intrin->def.bit_size);
- for (int i = MAX_SETS; i >= 0; i--) {
+ for (int i = dev->physical_device->info->a6xx.max_sets - 1; i >= 0; i--) {
nir_pop_if(b, NULL);
if (info->has_dest)
result = nir_if_phi(b, results[i], result);
return true;
case nir_intrinsic_vulkan_resource_index:
- lower_vulkan_resource_index(b, instr, shader, layout);
+ lower_vulkan_resource_index(dev, b, instr, shader, layout);
return true;
case nir_intrinsic_vulkan_resource_reindex:
lower_vulkan_resource_reindex(b, instr);
if (layout->independent_sets) {
const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
- reserved_consts_vec4 += DIV_ROUND_UP(MAX_SETS, 4);
+ assert(dev->physical_device->reserved_set_idx >= 0);
+ reserved_consts_vec4 += DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4);
} else {
const_state->dynamic_offset_loc = UINT32_MAX;
}