VK_EXT_4444_formats DONE (anv, lvp, radv, tu, v3dv)
VK_EXT_extended_dynamic_state DONE (anv, lvp, radv, tu)
VK_EXT_extended_dynamic_state2 DONE (anv, lvp, radv, tu)
- VK_EXT_inline_uniform_block DONE (anv, radv)
+ VK_EXT_inline_uniform_block DONE (anv, radv, v3dv)
VK_EXT_pipeline_creation_cache_control DONE (anv, radv, v3dv)
VK_EXT_pipeline_creation_feedback DONE (anv, radv, v3dv)
VK_EXT_private_data DONE (anv, lvp, radv, tu, v3dv)
/* Sub-pixel precission bits in the rasterizer */
#define V3D_COORD_SHIFT 6
+/* Size of a cache line */
+#define V3D_NON_COHERENT_ATOM_SIZE 256
+
#endif /* V3D_LIMITS_H */
vir_MOV(c, color_reads_for_sample[component]));
}
+static bool
+try_emit_uniform(struct v3d_compile *c,
+ int offset,
+ int num_components,
+ nir_dest *dest,
+ enum quniform_contents contents)
+{
+ /* Even though ldunif is strictly 32-bit we can still use it
+ * to load scalar 8-bit/16-bit uniforms so long as their offset
+ * is 32-bit aligned. In this case, ldunif would still load
+ * 32-bit into the destination with the 8-bit/16-bit uniform
+ * data in the LSB and garbage in the MSB, but that is fine
+ * because we should only be accessing the valid bits of the
+ * destination.
+ *
+ * FIXME: if in the future we improve our register allocator to
+ * pack 2 16-bit variables in the MSB and LSB of the same
+ * register then this optimization would not be valid as is,
+ * since the load clobbers the MSB.
+ */
+ if (offset % 4 != 0)
+ return false;
+
+ /* We need dwords */
+ offset = offset / 4;
+
+ for (int i = 0; i < num_components; i++) {
+ ntq_store_dest(c, dest, i,
+ vir_uniform(c, contents, offset + i));
+ }
+
+ return true;
+}
+
static void
ntq_emit_load_uniform(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
+ /* We scalarize general TMU access for anything that is not 32-bit. */
+ assert(nir_dest_bit_size(instr->dest) == 32 ||
+ instr->num_components == 1);
+
+ /* Try to emit ldunif if possible, otherwise fallback to general TMU */
if (nir_src_is_const(instr->src[0])) {
int offset = (nir_intrinsic_base(instr) +
nir_src_as_uint(instr->src[0]));
- /* Even though ldunif is strictly 32-bit we can still use it
- * to load scalar 8-bit/16-bit uniforms so long as their offset
- * is * 32-bit aligned. In this case, ldunif would still load
- * 32-bit into the destination with the 8-bit/16-bit uniform
- * data in the LSB and garbage in the MSB, but that is fine
- * because we should only be accessing the valid bits of the
- * destination.
- *
- * FIXME: if in the future we improve our register allocator to
- * pack 2 16-bit variables in the MSB and LSB of the same
- * register then this optimization would not be valid as is,
- * since the load clobbers the MSB.
- */
- if (offset % 4 == 0) {
- /* We need dwords */
- offset = offset / 4;
-
- /* We scalarize general TMU access for anything that
- * is not 32-bit.
- */
- assert(nir_dest_bit_size(instr->dest) == 32 ||
- instr->num_components == 1);
-
- for (int i = 0; i < instr->num_components; i++) {
- ntq_store_dest(c, &instr->dest, i,
- vir_uniform(c, QUNIFORM_UNIFORM,
- offset + i));
- }
+ if (try_emit_uniform(c, offset, instr->num_components,
+ &instr->dest, QUNIFORM_UNIFORM)) {
return;
}
}
ntq_emit_tmu_general(c, instr, false);
}
+static bool
+ntq_emit_inline_ubo_load(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ if (c->compiler->max_inline_uniform_buffers <= 0)
+ return false;
+
+ /* On Vulkan we use indices 1..MAX_INLINE_UNIFORM_BUFFERS for inline
+ * uniform buffers which we want to handle more like push constants
+ * than regular UBO. OpenGL doesn't implement this feature.
+ */
+ assert(c->key->environment == V3D_ENVIRONMENT_VULKAN);
+ uint32_t index = nir_src_as_uint(instr->src[0]);
+ if (index == 0 || index > c->compiler->max_inline_uniform_buffers)
+ return false;
+
+ /* We scalarize general TMU access for anything that is not 32-bit */
+ assert(nir_dest_bit_size(instr->dest) == 32 ||
+ instr->num_components == 1);
+
+ if (nir_src_is_const(instr->src[1])) {
+ /* Index 0 is reserved for push constants */
+ assert(index > 0);
+ uint32_t inline_index = index - 1;
+ int offset = nir_src_as_uint(instr->src[1]);
+ if (try_emit_uniform(c, offset, instr->num_components,
+ &instr->dest,
+ QUNIFORM_INLINE_UBO_0 + inline_index)) {
+ return true;
+ }
+ }
+
+ /* Fallback to regular UBO load */
+ return false;
+}
+
static void
ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
break;
case nir_intrinsic_load_ubo:
+ if (ntq_emit_inline_ubo_load(c, instr))
+ break;
+ FALLTHROUGH;
case nir_intrinsic_load_ssbo:
if (!ntq_emit_load_unifa(c, instr)) {
ntq_emit_tmu_general(c, instr, false);
* Current value of gl_ViewIndex for Multiview rendering.
*/
QUNIFORM_VIEW_INDEX,
+
+ /**
+ * Inline uniform buffers
+ */
+ QUNIFORM_INLINE_UBO_0,
+ QUNIFORM_INLINE_UBO_1,
+ QUNIFORM_INLINE_UBO_2,
+ QUNIFORM_INLINE_UBO_3,
};
static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
*/
struct v3d_compiler {
const struct v3d_device_info *devinfo;
+ uint32_t max_inline_uniform_buffers;
struct ra_regs *regs;
struct ra_class *reg_class_any[3];
struct ra_class *reg_class_r5[3];
return inst->uniform != ~0;
}
-const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
+const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo,
+ uint32_t max_inline_uniform_buffers);
void v3d_compiler_free(const struct v3d_compiler *compiler);
void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
}
const struct v3d_compiler *
-v3d_compiler_init(const struct v3d_device_info *devinfo)
+v3d_compiler_init(const struct v3d_device_info *devinfo,
+ uint32_t max_inline_uniform_buffers)
{
struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
if (!compiler)
return NULL;
compiler->devinfo = devinfo;
+ compiler->max_inline_uniform_buffers = max_inline_uniform_buffers;
if (!vir_init_reg_sets(compiler)) {
ralloc_free(compiler);
* binding layout, and array_index, it returns the map region assigned to it
* from the descriptor pool bo.
*/
-static void*
+static void *
descriptor_bo_map(struct v3dv_device *device,
struct v3dv_descriptor_set *set,
const struct v3dv_descriptor_set_binding_layout *binding_layout,
uint32_t array_index)
{
- assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0);
+ /* Inline uniform blocks use BO memory to store UBO contents, not
+ * descriptor data, so their descriptor BO size is 0 even though they
+ * do use BO memory.
+ */
+ uint32_t bo_size = v3dv_X(device, descriptor_bo_size)(binding_layout->type);
+ assert(bo_size > 0 ||
+ binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+
return set->pool->bo->map +
set->base_offset + binding_layout->descriptor_offset +
- array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type);
+ array_index * bo_size;
}
static bool
* It also returns the descriptor type, so the caller could do extra
* validation or adding extra offsets if the bo contains more that one field.
*/
-static struct v3dv_cl_reloc
+struct v3dv_cl_reloc
v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
const struct v3dv_descriptor_set_binding_layout *binding_layout =
&set->layout->binding[binding_number];
- assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0);
- *out_type = binding_layout->type;
+ assert(binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ||
+ v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0);
+ if (out_type)
+ *out_type = binding_layout->type;
uint32_t array_index = map->array_index[index];
assert(array_index < binding_layout->array_size);
uint32_t bo_size = 0;
uint32_t descriptor_count = 0;
+ const VkDescriptorPoolInlineUniformBlockCreateInfo *inline_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO);
+
assert(pCreateInfo->poolSizeCount > 0);
for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
/* Verify supported descriptor type */
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
break;
default:
unreachable("Unimplemented descriptor type");
}
assert(pCreateInfo->pPoolSizes[i].descriptorCount > 0);
- descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
- bo_size += v3dv_X(device, descriptor_bo_size)(pCreateInfo->pPoolSizes[i].type) *
- pCreateInfo->pPoolSizes[i].descriptorCount;
+ if (pCreateInfo->pPoolSizes[i].type ==
+ VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ /* Inline uniform blocks are specified to use the descriptor array
+ * size as the size in bytes of the block.
+ */
+ assert(inline_info);
+ descriptor_count++;
+ bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+ } else {
+ descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+ bo_size += v3dv_X(device, descriptor_bo_size)(pCreateInfo->pPoolSizes[i].type) *
+ pCreateInfo->pPoolSizes[i].descriptorCount;
+ }
+ }
+
+ /* We align all our buffers to V3D_NON_COHERENT_ATOM_SIZE, make sure we
+ * allocate enough memory to honor that requirement for all our inline
+ * buffers too.
+ */
+ if (inline_info) {
+ bo_size += V3D_NON_COHERENT_ATOM_SIZE *
+ inline_info->maxInlineUniformBlockBindings;
}
if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
/* Nothing here, just to keep the descriptor type filtering below */
break;
default:
samplers_offset += sizeof(struct v3dv_sampler) * binding->descriptorCount;
}
- descriptor_count += binding->descriptorCount;
- dynamic_offset_count += binding->descriptorCount *
- set_layout->binding[binding_number].dynamic_offset_count;
-
set_layout->shader_stages |= binding->stageFlags;
- set_layout->binding[binding_number].descriptor_offset = set_layout->bo_size;
- set_layout->bo_size +=
- v3dv_X(device, descriptor_bo_size)(set_layout->binding[binding_number].type) *
- binding->descriptorCount;
+ if (binding->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ dynamic_offset_count += binding->descriptorCount *
+ set_layout->binding[binding_number].dynamic_offset_count;
+
+ descriptor_count += binding->descriptorCount;
+
+ set_layout->binding[binding_number].descriptor_offset =
+ set_layout->bo_size;
+ set_layout->bo_size +=
+ v3dv_X(device, descriptor_bo_size)(set_layout->binding[binding_number].type) *
+ binding->descriptorCount;
+ } else {
+ /* We align all our buffers, inline buffers too. We made sure to take
+ * this account when calculating total BO size requirements at pool
+ * creation time.
+ */
+ set_layout->bo_size = align(set_layout->bo_size,
+ V3D_NON_COHERENT_ATOM_SIZE);
+
+ set_layout->binding[binding_number].descriptor_offset =
+ set_layout->bo_size;
+
+ /* Inline uniform blocks are not arrayed, instead descriptorCount
+ * specifies the size of the buffer in bytes.
+ */
+ set_layout->bo_size += binding->descriptorCount;
+ descriptor_count++;
+ }
}
free(bindings);
sizeof(bview->texture_shader_state));
}
+static void
+write_inline_uniform_descriptor(struct v3dv_device *device,
+ struct v3dv_descriptor *descriptor,
+ struct v3dv_descriptor_set *set,
+ const struct v3dv_descriptor_set_binding_layout *binding_layout,
+ const void *data,
+ size_t offset,
+ size_t size)
+{
+ assert(binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+ descriptor->type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT;
+ descriptor->buffer = NULL;
+
+ void *desc_map = descriptor_bo_map(device, set, binding_layout, 0);
+ memcpy(desc_map + offset, data, size);
+
+ /* Inline uniform buffers allocate BO space in the pool for all inline
+ * buffers it may allocate and then this space is assigned to individual
+ * descriptors when they are written, so we define the range of an inline
+ * buffer as the largest range of data that the client has written to it.
+ */
+ descriptor->offset = 0;
+ descriptor->range = MAX2(descriptor->range, offset + size);
+}
+
VKAPI_ATTR void VKAPI_CALL
v3dv_UpdateDescriptorSets(VkDevice _device,
uint32_t descriptorWriteCount,
struct v3dv_descriptor *descriptor = set->descriptors;
descriptor += binding_layout->descriptor_index;
- descriptor += writeset->dstArrayElement;
- for (uint32_t j = 0; j < writeset->descriptorCount; ++j) {
+ /* Inline uniform blocks are not arrayed, instead they use dstArrayElement
+ * to specify the byte offset of the uniform update and descriptorCount
+ * to specify the size (in bytes) of the update.
+ */
+ uint32_t descriptor_count;
+ if (writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ descriptor += writeset->dstArrayElement;
+ descriptor_count = writeset->descriptorCount;
+ } else {
+ descriptor_count = 1;
+ }
+
+ for (uint32_t j = 0; j < descriptor_count; ++j) {
switch(writeset->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
writeset->dstArrayElement + j);
break;
}
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+ const VkWriteDescriptorSetInlineUniformBlock *inline_write =
+ vk_find_struct_const(writeset->pNext,
+ WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK);
+ assert(inline_write->dataSize == writeset->descriptorCount);
+ write_inline_uniform_descriptor(device, descriptor, set,
+ binding_layout,
+ inline_write->pData,
+ writeset->dstArrayElement, /* offset */
+ inline_write->dataSize);
+ break;
+ }
default:
unreachable("unimplemented descriptor type");
break;
struct v3dv_descriptor *dst_descriptor = dst_set->descriptors;
src_descriptor += src_binding_layout->descriptor_index;
- src_descriptor += copyset->srcArrayElement;
-
dst_descriptor += dst_binding_layout->descriptor_index;
+
+ if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
+ /* {src,dst}ArrayElement specifies src/dst start offset and
+ * descriptorCount specifies size (in bytes) to copy.
+ */
+ const void *src_data = src_set->pool->bo->map +
+ src_set->base_offset +
+ src_binding_layout->descriptor_offset +
+ copyset->srcArrayElement;
+ write_inline_uniform_descriptor(device, dst_descriptor, dst_set,
+ dst_binding_layout,
+ src_data,
+ copyset->dstArrayElement,
+ copyset->descriptorCount);
+ continue;
+ }
+
+ src_descriptor += copyset->srcArrayElement;
dst_descriptor += copyset->dstArrayElement;
for (uint32_t j = 0; j < copyset->descriptorCount; j++) {
struct v3dv_descriptor *descriptor =
set->descriptors +
- binding_layout->descriptor_index +
- entry->array_element;
+ binding_layout->descriptor_index;
switch (entry->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
for (uint32_t j = 0; j < entry->array_count; j++) {
const VkDescriptorBufferInfo *info =
pData + entry->offset + j * entry->stride;
- write_buffer_descriptor(descriptor + j, entry->type, info);
+ write_buffer_descriptor(descriptor + entry->array_element + j,
+ entry->type, info);
}
break;
pData + entry->offset + j * entry->stride;
V3DV_FROM_HANDLE(v3dv_image_view, iview, info->imageView);
V3DV_FROM_HANDLE(v3dv_sampler, sampler, info->sampler);
- write_image_descriptor(device, descriptor + j, entry->type,
- set, binding_layout, iview, sampler,
- entry->array_element + j);
+ write_image_descriptor(device, descriptor + entry->array_element + j,
+ entry->type, set, binding_layout, iview,
+ sampler, entry->array_element + j);
}
break;
const VkBufferView *_bview =
pData + entry->offset + j * entry->stride;
V3DV_FROM_HANDLE(v3dv_buffer_view, bview, *_bview);
- write_buffer_view_descriptor(device, descriptor + j, entry->type,
- set, binding_layout, bview,
+ write_buffer_view_descriptor(device,
+ descriptor + entry->array_element + j,
+ entry->type, set, binding_layout, bview,
entry->array_element + j);
}
break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+ write_inline_uniform_descriptor(device, descriptor, set,
+ binding_layout,
+ pData + entry->offset,
+ entry->array_element, /* offset */
+ entry->array_count); /* size */
+ break;
+ }
+
default:
unreachable("Unsupported descriptor type");
}
.EXT_4444_formats = true,
.EXT_color_write_enable = true,
.EXT_custom_border_color = true,
+ .EXT_inline_uniform_block = true,
.EXT_external_memory_dma_buf = true,
.EXT_host_query_reset = true,
.EXT_image_drm_format_modifier = true,
if (result != VK_SUCCESS)
goto fail;
- device->compiler = v3d_compiler_init(&device->devinfo);
+ device->compiler = v3d_compiler_init(&device->devinfo,
+ MAX_INLINE_UNIFORM_BUFFERS);
device->next_program_id = 0;
ASSERTED int len =
{
v3dv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
+ VkPhysicalDeviceVulkan13Features vk13 = {
+ .inlineUniformBlock = true,
+ /* Inline buffers work like push constants, so after their are bound
+ * some of their contents may be copied into the uniform stream as soon
+ * as the next draw/dispatch is recorded in the command buffer. This means
+ * that if the client updates the buffer contents after binding it to
+ * a command buffer, the next queue submit of that command buffer may
+ * not use the latest update to the buffer contents, but the data that
+ * was present in the buffer at the time it was bound to the command
+ * buffer.
+ */
+ .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
+ };
+
VkPhysicalDeviceVulkan12Features vk12 = {
.hostQueryReset = true,
.uniformAndStorageBuffer8BitAccess = true,
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+ VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
+ (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
+ features->inlineUniformBlock = vk13.inlineUniformBlock;
+ features->descriptorBindingInlineUniformBlockUpdateAfterBind =
+ vk13.descriptorBindingInlineUniformBlockUpdateAfterBind;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = (void *) ext;
features->colorWriteEnable = true;
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = mem_size / page_size,
.maxSamplerAllocationCount = 64 * 1024,
- .bufferImageGranularity = 256, /* A cache line */
+ .bufferImageGranularity = V3D_NON_COHERENT_ATOM_SIZE,
.sparseAddressSpaceSize = 0,
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
.standardSampleLocations = false,
.optimalBufferCopyOffsetAlignment = 32,
.optimalBufferCopyRowPitchAlignment = 32,
- .nonCoherentAtomSize = 256,
+ .nonCoherentAtomSize = V3D_NON_COHERENT_ATOM_SIZE,
};
*pProperties = (VkPhysicalDeviceProperties) {
};
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+ VkPhysicalDeviceInlineUniformBlockProperties *props =
+ (VkPhysicalDeviceInlineUniformBlockProperties *)ext;
+ props->maxInlineUniformBlockSize = 4096;
+ props->maxPerStageDescriptorInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BUFFERS;
+ props->maxDescriptorSetInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BUFFERS;
+ props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0;
+ props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0;
+ break;
+ }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
buffer->size = pCreateInfo->size;
buffer->usage = pCreateInfo->usage;
- buffer->alignment = 256; /* nonCoherentAtomSize */
+ buffer->alignment = V3D_NON_COHERENT_ATOM_SIZE;
/* Limit allocations to 32-bit */
const VkDeviceSize aligned_size = align64(buffer->size, buffer->alignment);
#define MAX_INPUT_ATTACHMENTS 4
#define MAX_UNIFORM_BUFFERS 12
+#define MAX_INLINE_UNIFORM_BUFFERS 4
#define MAX_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_UNIFORM_BUFFERS 8
int binding,
int array_index,
int array_size,
+ int start_index,
uint8_t return_size)
{
assert(array_index < array_size);
assert(return_size == 16 || return_size == 32);
- unsigned index = 0;
- for (unsigned i = 0; i < map->num_desc; i++) {
- if (set == map->set[i] &&
- binding == map->binding[i] &&
- array_index == map->array_index[i]) {
- assert(array_size == map->array_size[i]);
+ unsigned index = start_index;
+ for (; index < map->num_desc; index++) {
+ if (map->used[index] &&
+ set == map->set[index] &&
+ binding == map->binding[index] &&
+ array_index == map->array_index[index]) {
+ assert(array_size == map->array_size[index]);
if (return_size != map->return_size[index]) {
/* It the return_size is different it means that the same sampler
* was used for operations with different precision
map->return_size[index] = 32;
}
return index;
+ } else if (!map->used[index]) {
+ break;
}
- index++;
}
- assert(index == map->num_desc);
+ assert(index < DESCRIPTOR_MAP_SIZE);
+ assert(!map->used[index]);
- map->set[map->num_desc] = set;
- map->binding[map->num_desc] = binding;
- map->array_index[map->num_desc] = array_index;
- map->array_size[map->num_desc] = array_size;
- map->return_size[map->num_desc] = return_size;
- map->num_desc++;
+ map->used[index] = true;
+ map->set[index] = set;
+ map->binding[index] = binding;
+ map->array_index[index] = array_index;
+ map->array_size[index] = array_size;
+ map->return_size[index] = return_size;
+ map->num_desc = MAX2(map->num_desc, index + 1);
return index;
}
&pipeline->shared_data->maps[broadcom_stage]->sampler_map :
&pipeline->shared_data->maps[broadcom_stage]->texture_map;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
default:
unreachable("Descriptor type unknown or not having a descriptor map");
struct v3dv_descriptor_set_binding_layout *binding_layout =
&set_layout->binding[binding];
unsigned index = 0;
- const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
- switch (desc_type) {
+ switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
struct v3dv_descriptor_map *descriptor_map =
- pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
+ pipeline_get_descriptor_map(pipeline, binding_layout->type,
+ shader->info.stage, false);
if (!const_val)
unreachable("non-constant vulkan_resource_index array index");
+ /* At compile-time we will need to know if we are processing a UBO load
+ * for an inline or a regular UBO so we can handle inline loads like
+ * push constants. At the level of NIR level however, the inline
+ * information is gone, so we rely on the index to make this distinction.
+ * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
+ * inline buffers. This means that at the descriptor map level
+ * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
+ * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
+ */
+ uint32_t start_index = 0;
+ if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
+ binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
+ start_index = MAX_INLINE_UNIFORM_BUFFERS;
+ }
+
index = descriptor_map_add(descriptor_map, set, binding,
const_val->u32,
binding_layout->array_size,
+ start_index,
32 /* return_size: doesn't really apply for this case */);
- if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
- /* skip index 0 which is used for push constants */
+ /* We always reserve index 0 for push constants */
+ if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
+ binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
index++;
}
+
break;
}
default:
- unreachable("unsupported desc_type for vulkan_resource_index");
+ unreachable("unsupported descriptor type for vulkan_resource_index");
break;
}
deref->var->data.binding,
array_index,
binding_layout->array_size,
+ 0,
return_size);
if (is_sampler)
deref->var->data.binding,
array_index,
binding_layout->array_size,
+ 0,
32 /* return_size: doesn't apply for textures */);
/* Note: we don't need to do anything here in relation to the precision and
*/
UNUSED unsigned index =
descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
- -1, -1, -1, 0, 16);
+ -1, -1, -1, 0, 0, 16);
assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
index =
descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
- -2, -2, -2, 0, 32);
+ -2, -2, -2, 0, 0, 32);
assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
struct {
struct v3dv_buffer *buffer;
- uint32_t offset;
- uint32_t range;
+ size_t offset;
+ size_t range;
};
struct v3dv_buffer_view *buffer_view;
* FIXME: one alternative would be to allocate the map as big as you need for
* each descriptor type. That would means more individual allocations.
*/
-#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
- MAX_UNIFORM_BUFFERS, \
+#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
+ MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
MAX_STORAGE_BUFFERS)
int binding[DESCRIPTOR_MAP_SIZE];
int array_index[DESCRIPTOR_MAP_SIZE];
int array_size[DESCRIPTOR_MAP_SIZE];
+ bool used[DESCRIPTOR_MAP_SIZE];
/* NOTE: the following is only for sampler, but this is the easier place to
* put it.
uint32_t index,
uint32_t *dynamic_offset);
+struct v3dv_cl_reloc
+v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
+ struct v3dv_descriptor_map *map,
+ struct v3dv_pipeline_layout *pipeline_layout,
+ uint32_t index,
+ VkDescriptorType *out_type);
+
const struct v3dv_sampler *
v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_bo *states[MAX_TOTAL_STATES];
};
-#define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES)
+#define MAX_TOTAL_UNIFORM_BUFFERS (1 + (MAX_UNIFORM_BUFFERS + \
+ MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
struct buffer_bo_list {
struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
uint32_t dynamic_offset = 0;
- /* For ubos, index is shifted, as 0 is reserved for push constants.
+ /* For ubos, index is shifted, as 0 is reserved for push constants
+ * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
+ * buffers.
*/
- if (content == QUNIFORM_UBO_ADDR &&
- v3d_unit_data_get_unit(data) == 0) {
+ uint32_t index = v3d_unit_data_get_unit(data);
+ if (content == QUNIFORM_UBO_ADDR && index == 0) {
/* This calls is to ensure that the push_constant_ubo is
* updated. It already take into account it is should do the
* update or not
offset + dynamic_offset);
buffer_bos->ubo[0] = resource->bo;
} else {
- uint32_t index =
- content == QUNIFORM_UBO_ADDR ?
- v3d_unit_data_get_unit(data) - 1 :
- data;
+ if (content == QUNIFORM_UBO_ADDR) {
+ /* We reserve index 0 for push constants and artificially increase our
+ * indices by one for that reason, fix that now before accessing the
+ * descriptor map.
+ */
+ assert(index > 0);
+ index--;
+ } else {
+ index = data;
+ }
struct v3dv_descriptor *descriptor =
v3dv_descriptor_map_get_descriptor(descriptor_state, map,
pipeline->layout,
index, &dynamic_offset);
+
+ /* Inline UBO descriptors store UBO data in descriptor pool memory,
+ * instead of an external buffer.
+ */
assert(descriptor);
- assert(descriptor->buffer);
- assert(descriptor->buffer->mem);
- assert(descriptor->buffer->mem->bo);
if (content == QUNIFORM_GET_SSBO_SIZE ||
content == QUNIFORM_GET_UBO_SIZE) {
cl_aligned_u32(uniforms, descriptor->range);
} else {
- cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset +
- descriptor->buffer->mem_offset +
- descriptor->offset +
- offset + dynamic_offset);
+ /* Inline uniform buffers store their contents in pool memory instead
+ * of an external buffer.
+ */
+ struct v3dv_bo *bo;
+ uint32_t addr;
+ if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ assert(dynamic_offset == 0);
+ struct v3dv_cl_reloc reloc =
+ v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
+ descriptor_state, map,
+ pipeline->layout, index,
+ NULL);
+ bo = reloc.bo;
+ addr = reloc.bo->offset + reloc.offset + offset;
+ } else {
+ assert(descriptor->buffer);
+ assert(descriptor->buffer->mem);
+ assert(descriptor->buffer->mem->bo);
+
+ bo = descriptor->buffer->mem->bo;
+ addr = bo->offset +
+ descriptor->buffer->mem_offset +
+ descriptor->offset +
+ offset + dynamic_offset;
+ }
+
+ cl_aligned_u32(uniforms, addr);
if (content == QUNIFORM_UBO_ADDR) {
- assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS);
- buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo;
+ assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
+ buffer_bos->ubo[index] = bo;
} else {
assert(index < MAX_TOTAL_STORAGE_BUFFERS);
- buffer_bos->ssbo[index] = descriptor->buffer->mem->bo;
+ buffer_bos->ssbo[index] = bo;
}
}
}
}
+static void
+write_inline_uniform(struct v3dv_cl_out **uniforms,
+ uint32_t index,
+ uint32_t offset,
+ struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_pipeline *pipeline,
+ enum broadcom_shader_stage stage)
+{
+ assert(index < MAX_INLINE_UNIFORM_BUFFERS);
+
+ struct v3dv_descriptor_state *descriptor_state =
+ v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
+
+ struct v3dv_descriptor_map *map =
+ &pipeline->shared_data->maps[stage]->ubo_map;
+
+ struct v3dv_cl_reloc reloc =
+ v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
+ descriptor_state, map,
+ pipeline->layout, index,
+ NULL);
+
+ /* Offset comes in 32-bit units */
+ uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
+ cl_aligned_u32(uniforms, *addr);
+}
+
static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view *image_view,
enum quniform_contents contents,
cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
break;
+ case QUNIFORM_INLINE_UBO_0:
+ case QUNIFORM_INLINE_UBO_1:
+ case QUNIFORM_INLINE_UBO_2:
+ case QUNIFORM_INLINE_UBO_3:
+ write_inline_uniform(&uniforms,
+ uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
+ cmd_buffer, pipeline, variant->stage);
+ break;
+
case QUNIFORM_VIEWPORT_X_SCALE:
cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
break;
return screen->devinfo.ver >= 40;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return 256;
+ return V3D_NON_COHERENT_ATOM_SIZE;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
if (screen->devinfo.ver < 40)
v3d_resource_screen_init(pscreen);
- screen->compiler = v3d_compiler_init(&screen->devinfo);
+ screen->compiler = v3d_compiler_init(&screen->devinfo, 0);
#ifdef ENABLE_SHADER_CACHE
v3d_disk_cache_init(screen);