PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT
};
+enum pvr_pds_addr_literal_type {
+ PVR_PDS_ADDR_LITERAL_DESC_SET_ADDRS_TABLE,
+};
+
/*****************************************************************************
Structure definitions
*****************************************************************************/
*/
};
+struct pvr_pds_addr_literal {
+ enum pvr_pds_addr_literal_type type;
+ unsigned int destination;
+};
+
#define PVR_BUFFER_TYPE_UBO (0)
#define PVR_BUFFER_TYPE_COMPILE_TIME (1)
#define PVR_BUFFER_TYPE_BLEND_CONSTS (2)
unsigned int descriptor_set_count;
struct pvr_pds_descriptor_set descriptor_sets[8];
+ unsigned int addr_literal_count;
+ struct pvr_pds_addr_literal addr_literals[8];
+
/* "State" buffers, including:
* compile-time constants
* blend constants
#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13)
#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL_BUFFER (15)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL (16)
+
/* We pack all the following structs tightly into a buffer using += sizeof(x)
* offsets, this can lead to data that is not native aligned. Supplying the
* packed attribute indicates that unaligned accesses may be required, and the
uint32_t cond_render_pred_temp;
} PVR_ALIGNED;
+struct pvr_pds_const_map_entry_addr_literal_buffer {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint32_t size;
+} PVR_ALIGNED;
+
+struct pvr_pds_const_map_entry_addr_literal {
+ uint8_t type;
+ enum pvr_pds_addr_literal_type addr_type;
+} PVR_ALIGNED;
+
struct pvr_pds_info {
uint32_t temps_required;
uint32_t code_size_in_dwords;
num_consts64 = input_program->descriptor_set_count;
total_dma_count = input_program->descriptor_set_count;
+ /* 1 DOUTD for buffer containing address literals. */
+ if (input_program->addr_literal_count > 0) {
+ num_consts32++;
+ num_consts64++;
+ total_dma_count++;
+ }
+
pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
for (unsigned int index = 0; index < input_program->buffer_count; index++) {
next_const64 = 0;
next_const32 = num_consts64 * 2;
+ if (input_program->addr_literal_count > 0) {
+ bool last_dma = (++running_dma_count == total_dma_count);
+ bool halt = last_dma && !input_program->secondary_program_present;
+
+ unsigned int size_in_dwords = input_program->addr_literal_count *
+ sizeof(uint64_t) / sizeof(uint32_t);
+ unsigned int destination = input_program->addr_literals[0].destination;
+
+ struct pvr_pds_const_map_entry_addr_literal_buffer
+ *addr_literal_buffer_entry;
+
+ addr_literal_buffer_entry = pvr_prepare_next_pds_const_map_entry(
+ &entry_write_state,
+ sizeof(*addr_literal_buffer_entry));
+
+ addr_literal_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL_BUFFER;
+ addr_literal_buffer_entry->size = size_in_dwords * sizeof(uint32_t);
+ addr_literal_buffer_entry->const_offset = next_const64 * 2;
+
+ for (unsigned int i = 0; i < input_program->addr_literal_count; i++) {
+ struct pvr_pds_const_map_entry_addr_literal *addr_literal_entry;
+
+ /* Check that the destinations for the addr literals are contiguous.
+ * Not supporting non contiguous ranges as that would either require a
+ * single large buffer with wasted memory for DMA, or multiple buffers
+ * to DMA.
+ */
+ if (i > 0) {
+ const uint32_t current_addr_literal_destination =
+ input_program->addr_literals[i].destination;
+ const uint32_t previous_addr_literal_destination =
+ input_program->addr_literals[i - 1].destination;
+
+ /* 2 regs to store 64 bits address. */
+ assert(current_addr_literal_destination ==
+ previous_addr_literal_destination + 2);
+ }
+
+ addr_literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*addr_literal_entry));
+
+ addr_literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL;
+ addr_literal_entry->addr_type = input_program->addr_literals[i].type;
+ }
+
+ PVR_PDS_MODE_TOGGLE(code_section,
+ instruction,
+ pvr_encode_burst_cs(&entry_write_state,
+ last_dma,
+ halt,
+ next_const32,
+ next_const64,
+ size_in_dwords,
+ destination));
+
+ next_const64++;
+ next_const32++;
+ }
+
/* For each descriptor set perform a DOUTD. */
for (unsigned int descriptor_index = 0;
descriptor_index < input_program->descriptor_set_count;
}
static VkResult
-pvr_setup_descriptor_mappings_new(uint32_t *const descriptor_data_offset_out)
+pvr_cmd_buffer_upload_desc_set_table(struct pvr_cmd_buffer *const cmd_buffer,
+ enum pvr_stage_allocation stage,
+ pvr_dev_addr_t *addr_out)
{
- *descriptor_data_offset_out = ~0;
+ uint64_t bound_desc_sets[PVR_MAX_DESCRIPTOR_SETS];
+ const struct pvr_descriptor_state *desc_state;
+ struct pvr_bo *bo;
+ VkResult result;
+
+ switch (stage) {
+ case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+ case PVR_STAGE_ALLOCATION_FRAGMENT:
+ desc_state = &cmd_buffer->state.gfx_desc_state;
+ break;
+
+ case PVR_STAGE_ALLOCATION_COMPUTE:
+ desc_state = &cmd_buffer->state.compute_desc_state;
+ break;
- pvr_finishme("Implement new desc set path.");
+ default:
+ unreachable("Unsupported stage.");
+ break;
+ }
- return VK_ERROR_UNKNOWN;
+ for (uint32_t set = 0; set < ARRAY_SIZE(bound_desc_sets); set++) {
+ if (!(desc_state->valid_mask & BITFIELD_BIT(set))) {
+ bound_desc_sets[set] = PVR_DEV_ADDR_INVALID.addr;
+ } else {
+ bound_desc_sets[set] =
+ desc_state->descriptor_sets[set]->pvr_bo->vma->dev_addr.addr;
+ }
+ }
+
+ result = pvr_cmd_buffer_upload_general(cmd_buffer,
+ bound_desc_sets,
+ sizeof(bound_desc_sets),
+ &bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ *addr_out = bo->vma->dev_addr;
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_process_addr_literal(struct pvr_cmd_buffer *cmd_buffer,
+ enum pvr_pds_addr_literal_type addr_literal_type,
+ enum pvr_stage_allocation stage,
+ pvr_dev_addr_t *addr_out)
+{
+ VkResult result;
+
+ switch (addr_literal_type) {
+ case PVR_PDS_ADDR_LITERAL_DESC_SET_ADDRS_TABLE: {
+ /* TODO: Maybe we want to free pvr_bo? And only when the data
+ * section is written successfully we link all bos to the command
+ * buffer.
+ */
+ result =
+ pvr_cmd_buffer_upload_desc_set_table(cmd_buffer, stage, addr_out);
+ if (result != VK_SUCCESS)
+ return result;
+
+ break;
+ }
+
+ default:
+ unreachable("Invalid add literal type.");
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_setup_descriptor_mappings_new(
+ struct pvr_cmd_buffer *const cmd_buffer,
+ enum pvr_stage_allocation stage,
+ const struct pvr_stage_allocation_descriptor_state *descriptor_state,
+ uint32_t *const descriptor_data_offset_out)
+{
+ const struct pvr_pds_info *const pds_info = &descriptor_state->pds_info;
+ const uint8_t *entries;
+ uint32_t *dword_buffer;
+ uint64_t *qword_buffer;
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ if (!pds_info->data_size_in_dwords)
+ return VK_SUCCESS;
+
+ result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+ cmd_buffer->device->heaps.pds_heap,
+ pds_info->data_size_in_dwords << 2,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dword_buffer = (uint32_t *)pvr_bo->bo->map;
+ qword_buffer = (uint64_t *)pvr_bo->bo->map;
+
+ entries = (uint8_t *)pds_info->entries;
+
+ switch (stage) {
+ case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+ case PVR_STAGE_ALLOCATION_FRAGMENT:
+ case PVR_STAGE_ALLOCATION_COMPUTE:
+ break;
+
+ default:
+ unreachable("Unsupported stage.");
+ break;
+ }
+
+ for (uint32_t i = 0; i < pds_info->entry_count; i++) {
+ const struct pvr_const_map_entry *const entry_header =
+ (struct pvr_const_map_entry *)entries;
+
+ switch (entry_header->type) {
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
+ const struct pvr_const_map_entry_literal32 *const literal =
+ (struct pvr_const_map_entry_literal32 *)entries;
+
+ PVR_WRITE(dword_buffer,
+ literal->literal_value,
+ literal->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*literal);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL_BUFFER: {
+ const struct pvr_pds_const_map_entry_addr_literal_buffer
+ *const addr_literal_buffer_entry =
+ (struct pvr_pds_const_map_entry_addr_literal_buffer *)entries;
+ struct pvr_device *device = cmd_buffer->device;
+ struct pvr_bo *addr_literal_buffer_bo;
+ uint32_t addr_literal_count = 0;
+ uint64_t *addr_literal_buffer;
+
+ result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+ device->heaps.general_heap,
+ addr_literal_buffer_entry->size,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &addr_literal_buffer_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ addr_literal_buffer = (uint64_t *)addr_literal_buffer_bo->bo->map;
+
+ entries += sizeof(*addr_literal_buffer_entry);
+
+ PVR_WRITE(qword_buffer,
+ addr_literal_buffer_bo->vma->dev_addr.addr,
+ addr_literal_buffer_entry->const_offset,
+ pds_info->data_size_in_dwords);
+
+ for (uint32_t j = i + 1; j < pds_info->entry_count; j++) {
+ const struct pvr_const_map_entry *const entry_header =
+ (struct pvr_const_map_entry *)entries;
+ const struct pvr_pds_const_map_entry_addr_literal *addr_literal;
+ pvr_dev_addr_t dev_addr;
+
+ if (entry_header->type != PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL)
+ break;
+
+ addr_literal =
+ (struct pvr_pds_const_map_entry_addr_literal *)entries;
+
+ result = pvr_process_addr_literal(cmd_buffer,
+ addr_literal->addr_type,
+ stage,
+ &dev_addr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ addr_literal_buffer[addr_literal_count++] = dev_addr.addr;
+
+ entries += sizeof(*addr_literal);
+ }
+
+ assert(addr_literal_count * sizeof(uint64_t) ==
+ addr_literal_buffer_entry->size);
+
+ i += addr_literal_count;
+
+ pvr_bo_cpu_unmap(device, addr_literal_buffer_bo);
+ break;
+ }
+
+ default:
+ unreachable("Unsupported map entry type.");
+ }
+ }
+
+ pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
+
+ *descriptor_data_offset_out =
+ pvr_bo->vma->dev_addr.addr -
+ cmd_buffer->device->heaps.pds_heap->base_addr.addr;
+
+ return VK_SUCCESS;
}
static VkResult pvr_setup_descriptor_mappings(
descriptor_data_offset_out);
}
- return pvr_setup_descriptor_mappings_new(descriptor_data_offset_out);
+ return pvr_setup_descriptor_mappings_new(cmd_buffer,
+ stage,
+ descriptor_state,
+ descriptor_data_offset_out);
}
static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
* (pvr_const_map_entry_literal32)
*
* 3. Size of DOUTU entry (pvr_const_map_entry_doutu_address)
+ *
+ * 4. Max. number of PDS address literals (8) * (
+ * size of entry
+ * (pvr_const_map_entry_descriptor_set_addrs_table)
+ *
+ * 5. Max. number of address literals with single buffer entry to DOUTD
+ size of entry
+ (pvr_pds_const_map_entry_addr_literal_buffer) +
+ 8 * size of entry (pvr_pds_const_map_entry_addr_literal)
*/
/* FIXME: PVR_MAX_DESCRIPTOR_SETS is 4 and not 8. The comment above seems to
PVR_PDS_MAX_BUFFERS *
(sizeof(struct pvr_const_map_entry_constant_buffer) +
sizeof(struct pvr_const_map_entry_literal32)) +
- sizeof(struct pvr_const_map_entry_doutu_address));
+ sizeof(struct pvr_const_map_entry_doutu_address) +
+ sizeof(struct pvr_pds_const_map_entry_addr_literal_buffer) +
+ 8 * sizeof(struct pvr_pds_const_map_entry_addr_literal));
}
/* This is a const pointer to an array of PVR_PDS_MAX_BUFFERS pvr_pds_buffer
return VK_SUCCESS;
}
+/**
+ * \brief Indicates the layout of shared registers allocated by the driver.
+ *
+ * 'present' fields indicate if a certain resource was allocated for, and
+ * whether it will be present in the shareds.
+ * 'offset' fields indicate at which shared reg the resource starts at.
+ */
+struct pvr_sh_reg_layout {
+ /* If this is present, it will always take up 2 sh regs in size and contain
+ * the device address of the descriptor set addrs table.
+ */
+ struct {
+ bool present;
+ uint32_t offset;
+ } descriptor_set_addrs_table;
+};
+
static VkResult pvr_pds_descriptor_program_create_and_upload(
struct pvr_device *const device,
const VkAllocationCallbacks *const allocator,
const struct pvr_explicit_constant_usage *const explicit_const_usage,
const struct pvr_pipeline_layout *const layout,
enum pvr_stage_allocation stage,
+ const struct pvr_sh_reg_layout *sh_reg_layout,
struct pvr_stage_allocation_descriptor_state *const descriptor_state)
{
const size_t const_entries_size_in_bytes =
};
}
} else {
- pvr_finishme("Implement new desc set path.");
- return VK_ERROR_UNKNOWN;
+ uint32_t addr_literals = 0;
+
+ if (sh_reg_layout->descriptor_set_addrs_table.present) {
+ program.addr_literals[addr_literals] = (struct pvr_pds_addr_literal){
+ .type = PVR_PDS_ADDR_LITERAL_DESC_SET_ADDRS_TABLE,
+ .destination = sh_reg_layout->descriptor_set_addrs_table.offset,
+ };
+ addr_literals++;
+ }
+
+ /* TODO: Add support for other allocation types. E.g. blend constants
+ * and push constants.
+ */
+
+ program.addr_literal_count = addr_literals;
}
entries_buffer = vk_alloc2(&device->vk.alloc,
vk_object_base_finish(&pipeline->base);
}
+/* How many shared regs it takes to store a pvr_dev_addr_t.
+ * Each shared reg is 32 bits.
+ */
+#define PVR_DEV_ADDR_SIZE_IN_SH_REGS \
+ DIV_ROUND_UP(sizeof(pvr_dev_addr_t), sizeof(uint32_t))
+
+/**
+ * \brief Allocates shared registers.
+ *
+ * \return How many sh regs are required.
+ */
+static uint32_t
+pvr_pipeline_alloc_shareds(const struct pvr_device *device,
+ const struct pvr_pipeline_layout *layout,
+ enum pvr_stage_allocation stage,
+ struct pvr_sh_reg_layout *const sh_reg_layout_out)
+{
+ ASSERTED const uint64_t reserved_shared_size =
+ device->pdevice->dev_runtime_info.reserved_shared_size;
+ ASSERTED const uint64_t max_coeff =
+ device->pdevice->dev_runtime_info.max_coeffs;
+
+ struct pvr_sh_reg_layout reg_layout = { 0 };
+ uint32_t next_free_sh_reg = 0;
+
+ reg_layout.descriptor_set_addrs_table.present =
+ !!(layout->shader_stage_mask & BITFIELD_BIT(stage));
+
+ if (reg_layout.descriptor_set_addrs_table.present) {
+ reg_layout.descriptor_set_addrs_table.offset = next_free_sh_reg;
+ next_free_sh_reg += PVR_DEV_ADDR_SIZE_IN_SH_REGS;
+ }
+
+ /* TODO: Add allocation for blend constants, push constants, and other buffer
+ * types.
+ */
+
+ *sh_reg_layout_out = reg_layout;
+
+ /* FIXME: We might need to take more things into consideration.
+ * See pvr_calc_fscommon_size_and_tiles_in_flight().
+ */
+ assert(next_free_sh_reg <= reserved_shared_size - max_coeff);
+
+ return next_free_sh_reg;
+}
+
+#undef PVR_DEV_ADDR_SIZE_IN_SH_REGS
+
/******************************************************************************
Compute pipeline functions
******************************************************************************/
uint32_t work_group_input_regs[PVR_WORKGROUP_DIMENSIONS];
struct pvr_explicit_constant_usage explicit_const_usage;
uint32_t local_input_regs[PVR_WORKGROUP_DIMENSIONS];
+ struct pvr_sh_reg_layout sh_reg_layout;
struct rogue_ubo_data ubo_data;
uint32_t barrier_coefficient;
uint32_t usc_temps;
explicit_const_usage = build_info.explicit_conts_usage;
} else {
+ uint32_t sh_count;
+
+ sh_count = pvr_pipeline_alloc_shareds(device,
+ compute_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_COMPUTE,
+ &sh_reg_layout);
+
+ compute_pipeline->shader_state.const_shared_reg_count = sh_count;
+
/* FIXME: Compile and upload the shader. */
/* FIXME: Initialize the shader state and setup build info. */
abort();
&explicit_const_usage,
compute_pipeline->base.layout,
PVR_STAGE_ALLOCATION_COMPUTE,
+ &sh_reg_layout,
&compute_pipeline->descriptor_state);
if (result != VK_SUCCESS)
goto err_free_shader;
struct rogue_build_ctx *ctx;
VkResult result;
+ const bool old_path =
+ pvr_hard_code_shader_required(&device->pdevice->dev_info);
+
+ /* Vars needed for the new path. */
+ /* TODO: These need to be passed into the compiler so that it knows which
+ * shared regs to use to access specific resources.
+ */
+ struct pvr_sh_reg_layout vert_sh_reg_layout;
+ struct pvr_sh_reg_layout frag_sh_reg_layout;
+ uint32_t vert_sh_count = 0;
+ uint32_t frag_sh_count = 0;
+
+ if (!old_path) {
+ vert_sh_count =
+ pvr_pipeline_alloc_shareds(device,
+ gfx_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+ &vert_sh_reg_layout);
+
+ frag_sh_count = pvr_pipeline_alloc_shareds(device,
+ gfx_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_FRAGMENT,
+ &frag_sh_reg_layout);
+ }
+
/* Setup shared build context. */
ctx = rogue_build_context_create(compiler);
if (!ctx)
pvr_vertex_state_init(gfx_pipeline,
&ctx->common_data[MESA_SHADER_VERTEX],
&ctx->stage_data.vs);
+
+ if (!old_path) {
+ struct pvr_vertex_shader_state *vertex_state =
+ &gfx_pipeline->shader_state.vertex;
+
+ /* FIXME: For now we just overwrite it but the compiler shouldn't be
+ * returning the sh count since the driver is in charge of allocating
+ * them.
+ */
+ vertex_state->stage_state.const_shared_reg_count = vert_sh_count;
+ }
}
result = pvr_gpu_upload_usc(device,
} else {
pvr_fragment_state_init(gfx_pipeline,
&ctx->common_data[MESA_SHADER_FRAGMENT]);
+
+ if (!old_path) {
+ struct pvr_fragment_shader_state *fragment_state =
+ &gfx_pipeline->shader_state.fragment;
+
+ /* FIXME: For now we just overwrite it but the compiler shouldn't be
+ * returning the sh count since the driver is in charge of allocating
+ * them.
+ */
+ fragment_state->stage_state.const_shared_reg_count = frag_sh_count;
+ }
}
result = pvr_gpu_upload_usc(device,
&vert_explicit_const_usage,
gfx_pipeline->base.layout,
PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+ &vert_sh_reg_layout,
&gfx_pipeline->shader_state.vertex.descriptor_state);
if (result != VK_SUCCESS)
goto err_free_vertex_attrib_program;
&frag_explicit_const_usage,
gfx_pipeline->base.layout,
PVR_STAGE_ALLOCATION_FRAGMENT,
+ &frag_sh_reg_layout,
&gfx_pipeline->shader_state.fragment.descriptor_state);
if (result != VK_SUCCESS)
goto err_free_vertex_descriptor_program;