*/
#include "v3dv_private.h"
-#include "broadcom/cle/v3dx_pack.h"
-#include "util/half_float.h"
#include "util/u_pack_color.h"
-#include "vk_format_info.h"
-
-const struct v3dv_dynamic_state default_dynamic_state = {
- .viewport = {
- .count = 0,
- },
- .scissor = {
- .count = 0,
- },
- .stencil_compare_mask =
- {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_write_mask =
- {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_reference =
- {
- .front = 0u,
- .back = 0u,
- },
- .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
- .depth_bias = {
- .constant_factor = 0.0f,
- .slope_factor = 0.0f,
- },
- .line_width = 1.0f,
-};
+#include "vk_util.h"
void
v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
if (!bo)
return;
- if (_mesa_set_search(job->bos, bo))
- return;
+ if (job->bo_handle_mask & bo->handle_bit) {
+ if (_mesa_set_search(job->bos, bo))
+ return;
+ }
_mesa_set_add(job->bos, bo);
job->bo_count++;
+ job->bo_handle_mask |= bo->handle_bit;
}
-static void
-cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer);
+void
+v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo)
+{
+ assert(bo);
+ _mesa_set_add(job->bos, bo);
+ job->bo_count++;
+ job->bo_handle_mask |= bo->handle_bit;
+}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateCommandPool(VkDevice _device,
const VkCommandPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
/* We only support one queue */
assert(pCreateInfo->queueFamilyIndex == 0);
- pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
+ VK_OBJECT_TYPE_COMMAND_POOL);
if (pool == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (pAllocator)
pool->alloc = *pAllocator;
else
- pool->alloc = device->alloc;
+ pool->alloc = device->vk.alloc;
list_inithead(&pool->cmd_buffers);
struct v3dv_cmd_pool *pool,
VkCommandBufferLevel level)
{
- /* Do not reset the loader data header! If we are calling this from
- * a command buffer reset that would reset the loader's dispatch table for
- * the command buffer.
+ /* Do not reset the base object! If we are calling this from a command
+ * buffer reset that would reset the loader's dispatch table for the
+ * command buffer, and any other relevant info from vk_object_base
*/
- const uint32_t ld_size = sizeof(VK_LOADER_DATA);
- uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + ld_size;
- memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - ld_size);
+ const uint32_t base_size = sizeof(struct vk_command_buffer);
+ uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size;
+ memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size);
cmd_buffer->device = device;
cmd_buffer->pool = pool;
VkCommandBuffer *pCommandBuffer)
{
struct v3dv_cmd_buffer *cmd_buffer;
- cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ cmd_buffer = vk_zalloc2(&device->vk.alloc,
+ &pool->alloc,
+ sizeof(*cmd_buffer),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- cmd_buffer_init(cmd_buffer, device, pool, level);
+ VkResult result;
+ result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
+ return result;
+ }
- cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
+ cmd_buffer_init(cmd_buffer, device, pool, level);
*pCommandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) {
list_del(&bo->list_link);
- vk_free(&job->device->alloc, bo);
+ vk_free(&job->device->vk.alloc, bo);
}
list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) {
list_del(&bo->list_link);
- vk_free(&job->device->alloc, bo);
+ vk_free(&job->device->vk.alloc, bo);
}
list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) {
list_del(&bo->list_link);
- vk_free(&job->device->alloc, bo);
+ vk_free(&job->device->vk.alloc, bo);
}
}
{
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
assert(job->cmd_buffer);
- vk_free(&job->cmd_buffer->device->alloc, job->cpu.event_wait.events);
+ vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
}
static void
job_destroy_cloned_gpu_cl_resources(job);
}
- vk_free(&job->device->alloc, job);
+ vk_free(&job->device->vk.alloc, job);
}
void
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb)
{
struct v3dv_cmd_buffer_private_obj *pobj =
- vk_alloc(&cmd_buffer->device->alloc, sizeof(*pobj), 8,
+ vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(*pobj), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!pobj) {
v3dv_flag_oom(cmd_buffer, NULL);
assert(pobj && pobj->obj && pobj->destroy_cb);
pobj->destroy_cb(v3dv_device_to_handle(cmd_buffer->device),
pobj->obj,
- &cmd_buffer->device->alloc);
+ &cmd_buffer->device->vk.alloc);
list_del(&pobj->list_link);
- vk_free(&cmd_buffer->device->alloc, pobj);
+ vk_free(&cmd_buffer->device->vk.alloc, pobj);
}
static void
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
if (cmd_buffer->state.query.end.alloc_count > 0)
- vk_free(&cmd_buffer->device->alloc, cmd_buffer->state.query.end.states);
+ vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.query.end.states);
if (cmd_buffer->push_constants_resource.bo)
v3dv_bo_free(cmd_buffer->device, cmd_buffer->push_constants_resource.bo);
cmd_buffer_destroy_private_obj(cmd_buffer, pobj);
}
- if (cmd_buffer->meta.blit.dspool) {
- v3dv_DestroyDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
- cmd_buffer->meta.blit.dspool,
- &cmd_buffer->device->alloc);
- }
-
if (cmd_buffer->state.meta.attachments) {
assert(cmd_buffer->state.meta.attachment_alloc_count > 0);
- vk_free(&cmd_buffer->device->alloc, cmd_buffer->state.meta.attachments);
+ vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.meta.attachments);
}
}
{
list_del(&cmd_buffer->pool_link);
cmd_buffer_free_resources(cmd_buffer);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
-}
-
-void
-v3dv_job_emit_binning_flush(struct v3dv_job *job)
-{
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, FLUSH, flush);
+ vk_command_buffer_finish(&cmd_buffer->vk);
+ vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc,
+ cmd_buffer);
}
static bool
struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx];
struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx];
+ /* Don't merge if the subpasses have different view masks, since in that
+ * case the framebuffer setup is different and we need to emit different
+ * RCLs.
+ */
+ if (subpass->view_mask != prev_subpass->view_mask)
+ return false;
+
/* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED,
* we need to check that for each subpass all its used attachments are
* used by the other subpass.
if (!compatible)
return false;
- /* FIXME: resolve attachments */
-
if (subpass->ds_attachment.attachment !=
prev_subpass->ds_attachment.attachment)
return false;
+ /* FIXME: Since some attachment formats can't be resolved using the TLB we
+ * need to emit separate resolve jobs for them and that would not be
+ * compatible with subpass merges. We could fix that by testing if any of
+ * the attachments to resolve doesn't suppotr TLB resolves.
+ */
+ if (prev_subpass->resolve_attachments || subpass->resolve_attachments)
+ return false;
+
return true;
}
uint32_t height,
uint32_t layers,
uint32_t render_target_count,
- uint8_t max_internal_bpp)
+ uint8_t max_internal_bpp,
+ bool msaa)
{
- static const uint8_t tile_sizes[] = {
- 64, 64,
- 64, 32,
- 32, 32,
- 32, 16,
- 16, 16,
- };
-
assert(job);
struct v3dv_frame_tiling *tiling = &job->frame_tiling;
tiling->height = height;
tiling->layers = layers;
tiling->render_target_count = render_target_count;
+ tiling->msaa = msaa;
+ tiling->internal_bpp = max_internal_bpp;
- uint32_t tile_size_index = 0;
-
- /* FIXME: MSAA */
-
- if (render_target_count > 2)
- tile_size_index += 2;
- else if (render_target_count > 1)
- tile_size_index += 1;
+ /* We can use double-buffer when MSAA is disabled to reduce tile store
+ * overhead.
+ *
+ * FIXME: if we are emitting any tile loads the hardware will serialize
+ * loads and stores across tiles effectivley disabling double buffering,
+ * so we would want to check for that and not enable it in that case to
+ * avoid reducing the tile size.
+ */
+ tiling->double_buffer =
+ unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
- tiling->internal_bpp = max_internal_bpp;
- tile_size_index += tiling->internal_bpp;
- assert(tile_size_index < ARRAY_SIZE(tile_sizes));
+ assert(!tiling->msaa || !tiling->double_buffer);
- tiling->tile_width = tile_sizes[tile_size_index * 2];
- tiling->tile_height = tile_sizes[tile_size_index * 2 + 1];
+ v3d_choose_tile_size(render_target_count, max_internal_bpp,
+ tiling->msaa, tiling->double_buffer,
+ &tiling->tile_width, &tiling->tile_height);
tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height);
uint32_t width,
uint32_t height,
uint32_t layers,
+ bool allocate_tile_state_for_all_layers,
uint32_t render_target_count,
- uint8_t max_internal_bpp)
+ uint8_t max_internal_bpp,
+ bool msaa)
{
assert(job);
const struct v3dv_frame_tiling *tiling =
job_compute_frame_tiling(job,
width, height, layers,
- render_target_count, max_internal_bpp);
+ render_target_count, max_internal_bpp, msaa);
v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
v3dv_return_if_oom(NULL, job);
+ /* We only need to allocate tile state for all layers if the binner
+ * writes primitives to layers other than the first. This can only be
+ * done using layered rendering (writing gl_Layer from a geometry shader),
+ * so for other cases of multilayered framebuffers (typically with
+ * meta copy/clear operations) that won't use layered rendering, we only
+ * need one layer worth of of tile state for the binner.
+ */
+ if (!allocate_tile_state_for_all_layers)
+ layers = 1;
+
/* The PTB will request the tile alloc initial size per tile at start
* of tile binning.
*/
return;
}
- v3dv_job_add_bo(job, job->tile_alloc);
+ v3dv_job_add_bo_unchecked(job, job->tile_alloc);
const uint32_t tsda_per_tile_size = 256;
const uint32_t tile_state_size = tiling->layers *
return;
}
- v3dv_job_add_bo(job, job->tile_state);
-
- /* This must go before the binning mode configuration. It is
- * required for layered framebuffers to work.
- */
- cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
- config.number_of_layers = layers;
- }
-
- cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
- config.width_in_pixels = tiling->width;
- config.height_in_pixels = tiling->height;
- config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
- config.multisample_mode_4x = false; /* FIXME */
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- }
-
- /* There's definitely nothing in the VCD cache we want. */
- cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+ v3dv_job_add_bo_unchecked(job, job->tile_state);
- /* "Binning mode lists must have a Start Tile Binning item (6) after
- * any prefix state data before the binning list proper starts."
- */
- cl_emit(&job->bcl, START_TILE_BINNING, bin);
+ v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers);
- job->ez_state = VC5_EZ_UNDECIDED;
- job->first_ez_state = VC5_EZ_UNDECIDED;
+ job->ez_state = V3D_EZ_UNDECIDED;
+ job->first_ez_state = V3D_EZ_UNDECIDED;
}
static void
* any RCL commands of its own.
*/
if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0)
- cmd_buffer_emit_render_pass_rcl(cmd_buffer);
-
- v3dv_job_emit_binning_flush(cmd_buffer->state.job);
-}
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer);
-static void
-cmd_buffer_end_render_pass_secondary(struct v3dv_cmd_buffer *cmd_buffer)
-{
- assert(cmd_buffer->state.job);
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl,
- cl_packet_length(RETURN_FROM_SUB_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
- cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret);
+ v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job);
}
struct v3dv_job *
struct v3dv_cmd_buffer *cmd_buffer,
uint32_t subpass_idx)
{
- struct v3dv_job *job = vk_zalloc(&device->alloc,
+ struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!job) {
cmd_buffer_end_render_pass_frame(cmd_buffer);
} else {
assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
- cmd_buffer_end_render_pass_secondary(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer);
}
}
v3dv_cl_init(job, &job->indirect);
- if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH))
job->always_flush = true;
}
* bits.
*/
cmd_buffer->state.dirty = ~0;
+ cmd_buffer->state.dirty_descriptor_stages = ~0;
+
+ /* Honor inheritance of occlussion queries in secondaries if requested */
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
+ cmd_buffer->state.inheritance.occlusion_query_enable) {
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
+ }
/* Keep track of the first subpass that we are recording in this new job.
* We will use this when we emit the RCL to decide how to emit our loads
v3dv_cmd_buffer_finish_job(cmd_buffer);
assert(cmd_buffer->state.job == NULL);
- struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->alloc,
+ struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
VkCommandBufferResetFlags flags)
{
+ vk_command_buffer_reset(&cmd_buffer->vk);
if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
struct v3dv_device *device = cmd_buffer->device;
struct v3dv_cmd_pool *pool = cmd_buffer->pool;
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateCommandBuffers(VkDevice _device,
const VkCommandBufferAllocateInfo *pAllocateInfo,
VkCommandBuffer *pCommandBuffers)
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_FreeCommandBuffers(VkDevice device,
VkCommandPool commandPool,
uint32_t commandBufferCount,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyCommandPool(VkDevice _device,
VkCommandPool commandPool,
const VkAllocationCallbacks *pAllocator)
cmd_buffer_destroy(cmd_buffer);
}
- vk_free2(&device->alloc, pAllocator, pool);
+ vk_object_free(&device->vk, pAllocator, pool);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_TrimCommandPool(VkDevice device,
+ VkCommandPool commandPool,
+ VkCommandPoolTrimFlags flags)
+{
+ /* We don't need to do anything here, our command pools never hold on to
+ * any resources from command buffers that are freed or reset.
+ */
+}
+
+
+static void
+cmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count);
+ const struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ const struct v3dv_subpass *subpass =
+ &pass->subpasses[cmd_buffer->state.subpass_idx];
+
+ if (!subpass->resolve_attachments)
+ return;
+
+ struct v3dv_framebuffer *fb = cmd_buffer->state.framebuffer;
+
+ /* At this point we have already ended the current subpass and now we are
+ * about to emit vkCmdResolveImage calls to get the resolves we can't handle
+ * handle in the subpass RCL.
+ *
+ * vkCmdResolveImage is not supposed to be called inside a render pass so
+ * before we call that we need to make sure our command buffer state reflects
+ * that we are no longer in a subpass by finishing the current job and
+ * resetting the framebuffer and render pass state temporarily and then
+ * restoring it after we are done with the resolves.
+ */
+ if (cmd_buffer->state.job)
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+ struct v3dv_framebuffer *restore_fb = cmd_buffer->state.framebuffer;
+ struct v3dv_render_pass *restore_pass = cmd_buffer->state.pass;
+ uint32_t restore_subpass_idx = cmd_buffer->state.subpass_idx;
+ cmd_buffer->state.framebuffer = NULL;
+ cmd_buffer->state.pass = NULL;
+ cmd_buffer->state.subpass_idx = -1;
+
+ VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ const uint32_t src_attachment_idx =
+ subpass->color_attachments[i].attachment;
+ if (src_attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ if (pass->attachments[src_attachment_idx].use_tlb_resolve)
+ continue;
+
+ const uint32_t dst_attachment_idx =
+ subpass->resolve_attachments[i].attachment;
+ if (dst_attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct v3dv_image_view *src_iview = fb->attachments[src_attachment_idx];
+ struct v3dv_image_view *dst_iview = fb->attachments[dst_attachment_idx];
+
+ VkImageResolve2KHR region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
+ .srcSubresource = {
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ src_iview->vk.base_mip_level,
+ src_iview->vk.base_array_layer,
+ src_iview->vk.layer_count,
+ },
+ .srcOffset = { 0, 0, 0 },
+ .dstSubresource = {
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ dst_iview->vk.base_mip_level,
+ dst_iview->vk.base_array_layer,
+ dst_iview->vk.layer_count,
+ },
+ .dstOffset = { 0, 0, 0 },
+ .extent = src_iview->vk.image->extent,
+ };
+
+ struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image;
+ struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image;
+ VkResolveImageInfo2KHR resolve_info = {
+ .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR,
+ .srcImage = v3dv_image_to_handle(src_image),
+ .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .dstImage = v3dv_image_to_handle(dst_image),
+ .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .regionCount = 1,
+ .pRegions = ®ion,
+ };
+ v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info);
+ }
+
+ cmd_buffer->state.framebuffer = restore_fb;
+ cmd_buffer->state.pass = restore_pass;
+ cmd_buffer->state.subpass_idx = restore_subpass_idx;
}
static VkResult
cmd_buffer->state.framebuffer =
v3dv_framebuffer_from_handle(inheritance_info->framebuffer);
+ assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count);
+ cmd_buffer->state.subpass_idx = inheritance_info->subpass;
+
+ cmd_buffer->state.inheritance.occlusion_query_enable =
+ inheritance_info->occlusionQueryEnable;
+
/* Secondaries that execute inside a render pass won't start subpasses
* so we want to create a job for them here.
*/
- assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, inheritance_info->subpass,
V3DV_JOB_TYPE_GPU_CL_SECONDARY);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
- cmd_buffer->state.subpass_idx = inheritance_info->subpass;
-
/* Secondary command buffers don't know about the render area, but our
* scissor setup accounts for it, so let's make sure we make it large
* enough that it doesn't actually constrain any rendering. This should
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
{
if (result != VK_SUCCESS)
return result;
}
-
- /* If the primary may have an active occlusion query we need to honor
- * that in the secondary.
- */
- if (pBeginInfo->pInheritanceInfo->occlusionQueryEnable)
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING;
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer,
VkCommandBufferResetFlags flags)
{
return cmd_buffer_reset(cmd_buffer, flags);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetCommandPool(VkDevice device,
VkCommandPool commandPool,
VkCommandPoolResetFlags flags)
return VK_SUCCESS;
}
-static void
-emit_clip_window(struct v3dv_job *job, const VkRect2D *rect)
-{
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, CLIP_WINDOW, clip) {
- clip.clip_window_left_pixel_coordinate = rect->offset.x;
- clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
- clip.clip_window_width_in_pixels = rect->extent.width;
- clip.clip_window_height_in_pixels = rect->extent.height;
- }
-}
-/* Checks whether the render area rectangle covers a region that is aligned to
- * tile boundaries, which means that for all tiles covered by the render area
- * region, there are no uncovered pixels (unless they are also outside the
- * framebuffer).
- */
static void
cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer)
{
* always have framebuffer information available.
*/
assert(cmd_buffer->state.framebuffer);
-
- const VkExtent2D fb_extent = {
- .width = cmd_buffer->state.framebuffer->width,
- .height = cmd_buffer->state.framebuffer->height
- };
-
- VkExtent2D granularity;
- v3dv_subpass_get_granularity(cmd_buffer->state.pass,
- cmd_buffer->state.subpass_idx,
- &granularity);
-
cmd_buffer->state.tile_aligned_render_area =
- rect->offset.x % granularity.width == 0 &&
- rect->offset.y % granularity.height == 0 &&
- (rect->extent.width % granularity.width == 0 ||
- rect->offset.x + rect->extent.width >= fb_extent.width) &&
- (rect->extent.height % granularity.height == 0 ||
- rect->offset.y + rect->extent.height >= fb_extent.height);
+ v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect,
+ cmd_buffer->state.framebuffer,
+ cmd_buffer->state.pass,
+ cmd_buffer->state.subpass_idx);
if (!cmd_buffer->state.tile_aligned_render_area) {
perf_debug("Render area for subpass %d of render pass %p doesn't "
}
}
-void
-v3dv_get_hw_clear_color(const VkClearColorValue *color,
- uint32_t internal_type,
- uint32_t internal_size,
- uint32_t *hw_color)
-{
- union util_color uc;
- switch (internal_type) {
- case V3D_INTERNAL_TYPE_8:
- util_pack_color(color->float32, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- memcpy(hw_color, uc.ui, internal_size);
- break;
- case V3D_INTERNAL_TYPE_8I:
- case V3D_INTERNAL_TYPE_8UI:
- hw_color[0] = ((color->uint32[0] & 0xff) |
- (color->uint32[1] & 0xff) << 8 |
- (color->uint32[2] & 0xff) << 16 |
- (color->uint32[3] & 0xff) << 24);
- break;
- case V3D_INTERNAL_TYPE_16F:
- util_pack_color(color->float32, PIPE_FORMAT_R16G16B16A16_FLOAT, &uc);
- memcpy(hw_color, uc.ui, internal_size);
- break;
- case V3D_INTERNAL_TYPE_16I:
- case V3D_INTERNAL_TYPE_16UI:
- hw_color[0] = ((color->uint32[0] & 0xffff) | color->uint32[1] << 16);
- hw_color[1] = ((color->uint32[2] & 0xffff) | color->uint32[3] << 16);
- break;
- case V3D_INTERNAL_TYPE_32F:
- case V3D_INTERNAL_TYPE_32I:
- case V3D_INTERNAL_TYPE_32UI:
- memcpy(hw_color, color->uint32, internal_size);
- break;
- }
-}
-
static void
cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t attachment_idx,
&cmd_buffer->state.pass->attachments[attachment_idx];
uint32_t internal_type, internal_bpp;
- const struct v3dv_format *format = v3dv_get_format(attachment->desc.format);
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
- &internal_type,
- &internal_bpp);
+ const struct v3dv_format *format =
+ v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format);
+
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format)
+ (format->rt_type, &internal_type, &internal_bpp);
uint32_t internal_size = 4 << internal_bpp;
struct v3dv_cmd_buffer_attachment_state *attachment_state =
&cmd_buffer->state.attachments[attachment_idx];
- v3dv_get_hw_clear_color(color, internal_type, internal_size,
- &attachment_state->clear_value.color[0]);
+ v3dv_X(cmd_buffer->device, get_hw_clear_color)
+ (color, internal_type, internal_size, &attachment_state->clear_value.color[0]);
attachment_state->vk_clear_value.color = *color;
}
if (state->attachment_alloc_count < pass->attachment_count) {
if (state->attachments > 0) {
assert(state->attachment_alloc_count > 0);
- vk_free(&cmd_buffer->device->alloc, state->attachments);
+ vk_free(&cmd_buffer->device->vk.alloc, state->attachments);
}
uint32_t size = sizeof(struct v3dv_cmd_buffer_attachment_state) *
pass->attachment_count;
- state->attachments = vk_zalloc(&cmd_buffer->device->alloc, size, 8,
+ state->attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!state->attachments) {
v3dv_flag_oom(cmd_buffer, NULL);
assert(state->attachment_alloc_count >= pass->attachment_count);
}
-void
-v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
- const VkRenderPassBeginInfo *pRenderPassBegin,
- VkSubpassContents contents)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBegin,
+ const VkSubpassBeginInfo *pSubpassBeginInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass);
* to emit a new clip window to constraint it to the render area.
*/
uint32_t min_render_x = state->render_area.offset.x;
- uint32_t min_render_y = state->render_area.offset.x;
+ uint32_t min_render_y = state->render_area.offset.y;
uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1;
uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1;
uint32_t min_clip_x = state->clip_window.offset.x;
v3dv_cmd_buffer_subpass_start(cmd_buffer, 0);
}
-void
-v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdNextSubpass2(VkCommandBuffer commandBuffer,
+ const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
/* Finish the previous subpass */
v3dv_cmd_buffer_subpass_finish(cmd_buffer);
+ cmd_buffer_subpass_handle_pending_resolves(cmd_buffer);
/* Start the next subpass */
v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1);
}
-void
-v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
- int rt,
- uint32_t *rt_bpp,
- uint32_t *rt_type,
- uint32_t *rt_clamp)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
- assert(state->subpass_idx < state->pass->subpass_count);
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- if (rt >= subpass->color_count)
- return;
-
- struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
- const uint32_t attachment_idx = attachment->attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- return;
-
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- assert(attachment_idx < framebuffer->attachment_count);
- struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
- assert(iview->aspects & VK_IMAGE_ASPECT_COLOR_BIT);
-
- *rt_bpp = iview->internal_bpp;
- *rt_type = iview->internal_type;
- *rt_clamp =vk_format_is_int(iview->vk_format) ?
- V3D_RENDER_TARGET_CLAMP_INT : V3D_RENDER_TARGET_CLAMP_NONE;
-}
-
static void
-cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- struct v3dv_image_view *iview,
- uint32_t layer,
- uint32_t buffer)
+cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
{
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
- uint32_t layer_offset = v3dv_layer_offset(image,
- iview->base_level,
- iview->first_layer + layer);
-
- cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
- load.buffer_to_load = buffer;
- load.address = v3dv_cl_address(image->mem->bo, layer_offset);
-
- load.input_image_format = iview->format->rt_type;
- load.r_b_swap = iview->swap_rb;
- load.memory_format = slice->tiling;
-
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- load.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- load.height_in_ub_or_stride = slice->stride;
- }
-
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else
- load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
-static void
-cmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t layer)
-{
+ assert(cmd_buffer->state.pass);
+ assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count);
const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
const struct v3dv_render_pass *pass = state->pass;
const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+ /* We only need to emit subpass clears as draw calls when the render
+ * area is not aligned to tile boundaries or for GFXH-1461.
+ */
+ if (cmd_buffer->state.tile_aligned_render_area &&
+ !subpass->do_depth_clear_with_draw &&
+ !subpass->do_depth_clear_with_draw) {
+ return;
+ }
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
+ uint32_t att_count = 0;
+ VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
- const struct v3dv_render_pass_attachment *attachment =
- &state->pass->attachments[attachment_idx];
+ /* We only need to emit subpass clears as draw calls for color attachments
+ * if the render area is not aligned to tile boundaries.
+ */
+ if (!cmd_buffer->state.tile_aligned_render_area) {
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ const uint32_t att_idx = subpass->color_attachments[i].attachment;
+ if (att_idx == VK_ATTACHMENT_UNUSED)
+ continue;
- /* According to the Vulkan spec:
- *
- * "The load operation for each sample in an attachment happens before
- * any recorded command which accesses the sample in the first subpass
- * where the attachment is used."
- *
- * If the load operation is CLEAR, we must only clear once on the first
- * subpass that uses the attachment (and in that case we don't LOAD).
- * After that, we always want to load so we don't lose any rendering done
- * by a previous subpass to the same attachment. We also want to load
- * if the current job is continuing subpass work started by a previous
- * job, for the same reason.
- *
- * If the render area is not aligned to tile boundaries then we have
- * tiles which are partially covered by it. In this case, we need to
- * load the tiles so we can preserve the pixels that are outside the
- * render area for any such tiles.
- */
- assert(state->job->first_subpass >= attachment->first_subpass);
- bool needs_load =
- state->job->first_subpass > attachment->first_subpass ||
- state->job->is_subpass_continue ||
- attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
- !state->tile_aligned_render_area;
-
- if (needs_load) {
- struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
- cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview,
- layer, RENDER_TARGET_0 + i);
+ struct v3dv_render_pass_attachment *att = &pass->attachments[att_idx];
+ if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
+ continue;
+
+ if (state->subpass_idx != att->first_subpass)
+ continue;
+
+ atts[att_count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ atts[att_count].colorAttachment = i;
+ atts[att_count].clearValue = state->attachments[att_idx].vk_clear_value;
+ att_count++;
}
}
- uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_render_pass_attachment *ds_attachment =
- &state->pass->attachments[ds_attachment_idx];
-
- assert(state->job->first_subpass >= ds_attachment->first_subpass);
- const bool might_need_load =
- state->job->first_subpass > ds_attachment->first_subpass ||
- state->job->is_subpass_continue ||
- !state->tile_aligned_render_area;
-
- const bool needs_depth_load =
- vk_format_has_depth(ds_attachment->desc.format) &&
- (ds_attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
- might_need_load);
-
- const bool needs_stencil_load =
- vk_format_has_stencil(ds_attachment->desc.format) &&
- (ds_attachment->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
- might_need_load);
-
- if (needs_depth_load || needs_stencil_load) {
- struct v3dv_image_view *iview =
- framebuffer->attachments[ds_attachment_idx];
- /* From the Vulkan spec:
- *
- * "When an image view of a depth/stencil image is used as a
- * depth/stencil framebuffer attachment, the aspectMask is ignored
- * and both depth and stencil image subresources are used."
- *
- * So we ignore the aspects from the subresource range of the image
- * view for the depth/stencil attachment, but we still need to restrict
- * the to aspects compatible with the render pass and the image.
- */
- const uint32_t zs_buffer =
- v3dv_zs_buffer(needs_depth_load, needs_stencil_load);
- cmd_buffer_render_pass_emit_load(cmd_buffer, cl,
- iview, layer, zs_buffer);
+ /* For D/S we may also need to emit a subpass clear for GFXH-1461 */
+ const uint32_t ds_att_idx = subpass->ds_attachment.attachment;
+ if (ds_att_idx != VK_ATTACHMENT_UNUSED) {
+ struct v3dv_render_pass_attachment *att = &pass->attachments[ds_att_idx];
+ if (state->subpass_idx == att->first_subpass) {
+ VkImageAspectFlags aspects = vk_format_aspects(att->desc.format);
+ if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR ||
+ (cmd_buffer->state.tile_aligned_render_area &&
+ !subpass->do_depth_clear_with_draw)) {
+ aspects &= ~VK_IMAGE_ASPECT_DEPTH_BIT;
+ }
+ if (att->desc.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_CLEAR ||
+ (cmd_buffer->state.tile_aligned_render_area &&
+ !subpass->do_stencil_clear_with_draw)) {
+ aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ if (aspects) {
+ atts[att_count].aspectMask = aspects;
+ atts[att_count].colorAttachment = 0; /* Ignored */
+ atts[att_count].clearValue =
+ state->attachments[ds_att_idx].vk_clear_value;
+ att_count++;
+ }
}
}
- cl_emit(cl, END_OF_LOADS, end);
-}
-
-static void
-cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t attachment_idx,
- uint32_t layer,
- uint32_t buffer,
- bool clear)
-{
- const struct v3dv_image_view *iview =
- cmd_buffer->state.framebuffer->attachments[attachment_idx];
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
- uint32_t layer_offset = v3dv_layer_offset(image,
- iview->base_level,
- iview->first_layer + layer);
-
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = buffer;
- store.address = v3dv_cl_address(image->mem->bo, layer_offset);
- store.clear_buffer_being_stored = clear;
-
- store.output_image_format = iview->format->rt_type;
- store.r_b_swap = iview->swap_rb;
- store.memory_format = slice->tiling;
-
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- store.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- store.height_in_ub_or_stride = slice->stride;
- }
+ if (att_count == 0)
+ return;
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else
- store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ if (!cmd_buffer->state.tile_aligned_render_area) {
+ perf_debug("Render area doesn't match render pass granularity, falling "
+ "back to vkCmdClearAttachments for "
+ "VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
+ } else if (subpass->do_depth_clear_with_draw ||
+ subpass->do_stencil_clear_with_draw) {
+ perf_debug("Subpass clears DEPTH but loads STENCIL (or viceversa), "
+ "falling back to vkCmdClearAttachments for "
+ "VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
}
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * "VK_ATTACHMENT_LOAD_OP_CLEAR specifies that the contents within the
+ * render area will be cleared to a uniform value, which is specified
+ * when a render pass instance is begun."
+ *
+ * So the clear is only constrained by the render area and not by pipeline
+ * state such as scissor or viewport, these are the semantics of
+ * vkCmdClearAttachments as well.
+ */
+ VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
+ VkClearRect rect = {
+ .rect = state->render_area,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ };
+ v3dv_CmdClearAttachments(_cmd_buffer, att_count, atts, 1, &rect);
}
-static void
-cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t layer)
+static struct v3dv_job *
+cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t subpass_idx,
+ enum v3dv_job_type type)
{
+ assert(type == V3DV_JOB_TYPE_GPU_CL ||
+ type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
+
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
+ assert(subpass_idx < state->pass->subpass_count);
+
+ /* Starting a new job can trigger a finish of the current one, so don't
+ * change the command buffer state for the new job until we are done creating
+ * the new job.
+ */
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_start_job(cmd_buffer, subpass_idx, type);
+ if (!job)
+ return NULL;
+
+ state->subpass_idx = subpass_idx;
- bool has_stores = false;
- bool use_global_clear = false;
+ /* If we are starting a new job we need to setup binning. We only do this
+ * for V3DV_JOB_TYPE_GPU_CL jobs because V3DV_JOB_TYPE_GPU_CL_SECONDARY
+ * jobs are not submitted to the GPU directly, and are instead meant to be
+ * branched to from other V3DV_JOB_TYPE_GPU_CL jobs.
+ */
+ if (type == V3DV_JOB_TYPE_GPU_CL &&
+ job->first_subpass == state->subpass_idx) {
+ const struct v3dv_subpass *subpass =
+ &state->pass->subpasses[state->subpass_idx];
- /* FIXME: separate stencil */
- uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_render_pass_attachment *ds_attachment =
- &state->pass->attachments[ds_attachment_idx];
+ const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- assert(state->job->first_subpass >= ds_attachment->first_subpass);
- assert(state->subpass_idx >= ds_attachment->first_subpass);
- assert(state->subpass_idx <= ds_attachment->last_subpass);
+ uint8_t internal_bpp;
+ bool msaa;
+ v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
+ (framebuffer, subpass, &internal_bpp, &msaa);
- /* From the Vulkan spec, VkImageSubresourceRange:
- *
- * "When an image view of a depth/stencil image is used as a
- * depth/stencil framebuffer attachment, the aspectMask is ignored
- * and both depth and stencil image subresources are used."
- *
- * So we ignore the aspects from the subresource range of the image
- * view for the depth/stencil attachment, but we still need to restrict
- * the to aspects compatible with the render pass and the image.
- */
- const VkImageAspectFlags aspects =
- vk_format_aspects(ds_attachment->desc.format);
-
- /* Only clear once on the first subpass that uses the attachment */
- bool needs_depth_clear =
- (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- state->tile_aligned_render_area &&
- state->job->first_subpass == ds_attachment->first_subpass &&
- ds_attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
- !state->job->is_subpass_continue;
-
- bool needs_stencil_clear =
- (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- state->tile_aligned_render_area &&
- state->job->first_subpass == ds_attachment->first_subpass &&
- ds_attachment->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
- !state->job->is_subpass_continue;
-
- /* Skip the last store if it is not required */
- bool needs_depth_store =
- (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- (state->subpass_idx < ds_attachment->last_subpass ||
- ds_attachment->desc.storeOp == VK_ATTACHMENT_STORE_OP_STORE ||
- !state->job->is_subpass_finish);
-
- bool needs_stencil_store =
- (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- (state->subpass_idx < ds_attachment->last_subpass ||
- ds_attachment->desc.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE ||
- !state->job->is_subpass_finish);
-
- /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
- * buffer bit is broken for depth/stencil. In addition, the
- * clear packet's Z/S bit is broken, but the RTs bit ends up
- * clearing Z/S.
+ /* From the Vulkan spec:
*
- * So if we have to emit a clear of depth or stencil we don't use
- * per-buffer clears, not even for color, since we will have to emit
- * a clear command for all tile buffers (including color) to handle
- * the depth/stencil clears.
+ * "If the render pass uses multiview, then layers must be one and
+ * each attachment requires a number of layers that is greater than
+ * the maximum bit index set in the view mask in the subpasses in
+ * which it is used."
*
- * Note that this bug is not reproduced in the simulator, where
- * using the clear buffer bit in depth/stencil stores seems to work
- * correctly.
+ * So when multiview is enabled, we take the number of layers from the
+ * last bit set in the view mask.
*/
- use_global_clear = needs_depth_clear || needs_stencil_clear;
- if (needs_depth_store || needs_stencil_store) {
- const uint32_t zs_buffer =
- v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
- cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
- ds_attachment_idx, layer,
- zs_buffer, false);
- has_stores = true;
+ uint32_t layers = framebuffer->layers;
+ if (subpass->view_mask != 0) {
+ assert(framebuffer->layers == 1);
+ layers = util_last_bit(subpass->view_mask);
}
- }
-
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
-
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
- const struct v3dv_render_pass_attachment *attachment =
- &state->pass->attachments[attachment_idx];
-
- assert(state->job->first_subpass >= attachment->first_subpass);
- assert(state->subpass_idx >= attachment->first_subpass);
- assert(state->subpass_idx <= attachment->last_subpass);
-
- /* Only clear once on the first subpass that uses the attachment */
- bool needs_clear =
- state->tile_aligned_render_area &&
- state->job->first_subpass == attachment->first_subpass &&
- attachment->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
- !state->job->is_subpass_continue;
-
- /* Skip the last store if it is not required */
- bool needs_store =
- state->subpass_idx < attachment->last_subpass ||
- attachment->desc.storeOp == VK_ATTACHMENT_STORE_OP_STORE ||
- !state->job->is_subpass_finish;
-
- if (needs_store) {
- cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
- attachment_idx, layer,
- RENDER_TARGET_0 + i,
- needs_clear && !use_global_clear);
- has_stores = true;
- } else if (needs_clear) {
- use_global_clear = true;
- }
- }
-
- /* We always need to emit at least one dummy store */
- if (!has_stores) {
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- }
-
- /* If we have any depth/stencil clears we can't use the per-buffer clear
- * bit and instead we have to emit a single clear of all tile buffers.
- */
- if (use_global_clear) {
- cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = true;
- clear.clear_all_render_targets = true;
- }
- }
-}
-
-static void
-cmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t layer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- /* Emit the generic list in our indirect state -- the rcl will just
- * have pointers into it.
- */
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer);
-
- /* The binner starts out writing tiles assuming that the initial mode
- * is triangles, so make sure that's the case.
- */
- cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
- fmt.primitive_type = LIST_TRIANGLES;
- }
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t layer)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
- struct v3dv_job *job = cmd_buffer->state.job;
- struct v3dv_cl *rcl = &job->rcl;
-
- /* If doing multicore binning, we would need to initialize each
- * core's tile list here.
- */
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
- const uint32_t tile_alloc_offset =
- 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
- cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
- config.number_of_bin_tile_lists = 1;
- config.total_frame_width_in_tiles = tiling->draw_tiles_x;
- config.total_frame_height_in_tiles = tiling->draw_tiles_y;
-
- config.supertile_width_in_tiles = tiling->supertile_width;
- config.supertile_height_in_tiles = tiling->supertile_height;
-
- config.total_frame_width_in_supertiles =
- tiling->frame_width_in_supertiles;
- config.total_frame_height_in_supertiles =
- tiling->frame_height_in_supertiles;
- }
-
- /* Start by clearing the tile buffer. */
- cl_emit(rcl, TILE_COORDINATES, coords) {
- coords.tile_column_number = 0;
- coords.tile_row_number = 0;
- }
-
- /* Emit an initial clear of the tile buffers. This is necessary
- * for any buffers that should be cleared (since clearing
- * normally happens at the *end* of the generic tile list), but
- * it's also nice to clear everything so the first tile doesn't
- * inherit any contents from some previous frame.
- *
- * Also, implement the GFXH-1742 workaround. There's a race in
- * the HW between the RCL updating the TLB's internal type/size
- * and the spawning of the QPU instances using the TLB's current
- * internal type/size. To make sure the QPUs get the right
- * state, we need 1 dummy store in between internal type/size
- * changes on V3D 3.x, and 2 dummy stores on 4.x.
- */
- for (int i = 0; i < 2; i++) {
- if (i > 0)
- cl_emit(rcl, TILE_COORDINATES, coords);
- cl_emit(rcl, END_OF_LOADS, end);
- cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
- cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = true;
- clear.clear_all_render_targets = true;
- }
- }
- cl_emit(rcl, END_OF_TILE_MARKER, end);
- }
-
- cl_emit(rcl, FLUSH_VCD_CACHE, flush);
-
- cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer);
-
- uint32_t supertile_w_in_pixels =
- tiling->tile_width * tiling->supertile_width;
- uint32_t supertile_h_in_pixels =
- tiling->tile_height * tiling->supertile_height;
- const uint32_t min_x_supertile =
- state->render_area.offset.x / supertile_w_in_pixels;
- const uint32_t min_y_supertile =
- state->render_area.offset.y / supertile_h_in_pixels;
-
- uint32_t max_render_x = state->render_area.offset.x;
- if (state->render_area.extent.width > 0)
- max_render_x += state->render_area.extent.width - 1;
- uint32_t max_render_y = state->render_area.offset.y;
- if (state->render_area.extent.height > 0)
- max_render_y += state->render_area.extent.height - 1;
- const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
- const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
-
- for (int y = min_y_supertile; y <= max_y_supertile; y++) {
- for (int x = min_x_supertile; x <= max_x_supertile; x++) {
- cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
- coords.column_number_in_supertiles = x;
- coords.row_number_in_supertiles = y;
- }
- }
- }
-}
-
-static void
-set_rcl_early_z_config(struct v3dv_job *job,
- bool *early_z_disable,
- uint32_t *early_z_test_and_update_direction)
-{
- switch (job->first_ez_state) {
- case VC5_EZ_UNDECIDED:
- case VC5_EZ_LT_LE:
- *early_z_disable = false;
- *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE;
- break;
- case VC5_EZ_GT_GE:
- *early_z_disable = false;
- *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE;
- break;
- case VC5_EZ_DISABLED:
- *early_z_disable = true;
- break;
- }
-}
-
-static void
-cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
-
- /* We can't emit the RCL until we have a framebuffer, which we may not have
- * if we are recording a secondary command buffer. In that case, we will
- * have to wait until vkCmdExecuteCommands is called from a primary command
- * buffer.
- */
- if (!framebuffer) {
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
- return;
- }
-
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
-
- const uint32_t fb_layers = framebuffer->layers;
- v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
- MAX2(fb_layers, 1) * 256 *
- cl_packet_length(SUPERTILE_COORDINATES));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- assert(state->subpass_idx < state->pass->subpass_count);
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- struct v3dv_cl *rcl = &job->rcl;
-
- /* Comon config must be the first TILE_RENDERING_MODE_CFG and
- * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
- * updates to the previous HW state.
- */
- const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
- config.image_width_pixels = framebuffer->width;
- config.image_height_pixels = framebuffer->height;
- config.number_of_render_targets = MAX2(subpass->color_count, 1);
- config.multisample_mode_4x = false; /* FIXME */
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_image_view *iview =
- framebuffer->attachments[ds_attachment_idx];
- config.internal_depth_type = iview->internal_type;
- }
-
- set_rcl_early_z_config(job,
- &config.early_z_disable,
- &config.early_z_test_and_update_direction);
- }
-
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct v3dv_image_view *iview =
- state->framebuffer->attachments[attachment_idx];
-
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
-
- /* FIXME */
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
-
- const uint32_t *clear_color =
- &state->attachments[attachment_idx].clear_value.color[0];
-
- uint32_t clear_pad = 0;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- int uif_block_height = v3d_utile_height(image->cpp) * 2;
-
- uint32_t implicit_padded_height =
- align(framebuffer->height, uif_block_height) / uif_block_height;
-
- if (slice->padded_height_of_output_image_in_uif_blocks -
- implicit_padded_height >= 15) {
- clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
- clear.clear_color_low_32_bits = clear_color[0];
- clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
- clear.render_target_number = i;
- };
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
- clear.clear_color_mid_low_32_bits =
- ((clear_color[1] >> 24) | (clear_color[2] << 8));
- clear.clear_color_mid_high_24_bits =
- ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
- clear.render_target_number = i;
- };
- }
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
- clear.uif_padded_height_in_uif_blocks = clear_pad;
- clear.clear_color_high_16_bits = clear_color[3] >> 16;
- clear.render_target_number = i;
- };
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- v3dv_render_pass_setup_render_target(cmd_buffer, 0,
- &rt.render_target_0_internal_bpp,
- &rt.render_target_0_internal_type,
- &rt.render_target_0_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 1,
- &rt.render_target_1_internal_bpp,
- &rt.render_target_1_internal_type,
- &rt.render_target_1_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 2,
- &rt.render_target_2_internal_bpp,
- &rt.render_target_2_internal_type,
- &rt.render_target_2_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 3,
- &rt.render_target_3_internal_bpp,
- &rt.render_target_3_internal_type,
- &rt.render_target_3_clamp);
- }
-
- /* Ends rendering mode config. */
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value =
- state->attachments[ds_attachment_idx].clear_value.z;
- clear.stencil_clear_value =
- state->attachments[ds_attachment_idx].clear_value.s;
- };
- } else {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value = 1.0f;
- clear.stencil_clear_value = 0;
- };
- }
-
- /* Always set initial block size before the first branch, which needs
- * to match the value from binning mode config.
- */
- cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
- init.use_auto_chained_tile_lists = true;
- init.size_of_first_block_in_chained_tile_lists =
- TILE_ALLOCATION_BLOCK_SIZE_64B;
- }
-
- for (int layer = 0; layer < MAX2(1, fb_layers); layer++)
- cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer);
-
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-static void
-cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
-{
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
-
- assert(cmd_buffer->state.pass);
- assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count);
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_render_pass *pass = state->pass;
- const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
-
- uint32_t att_count = 0;
- VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- const uint32_t att_idx = subpass->color_attachments[i].attachment;
- if (att_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct v3dv_render_pass_attachment *att = &pass->attachments[att_idx];
- if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
- continue;
-
- if (state->subpass_idx != att->first_subpass)
- continue;
-
- atts[att_count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- atts[att_count].colorAttachment = i;
- atts[att_count].clearValue = state->attachments[att_idx].vk_clear_value;
- att_count++;
- }
-
- const uint32_t ds_att_idx = subpass->ds_attachment.attachment;
- if (ds_att_idx != VK_ATTACHMENT_UNUSED) {
- struct v3dv_render_pass_attachment *att = &pass->attachments[ds_att_idx];
- if (state->subpass_idx == att->first_subpass) {
- VkImageAspectFlags aspects = vk_format_aspects(att->desc.format);
- if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
- aspects &= ~VK_IMAGE_ASPECT_DEPTH_BIT;
- if (att->desc.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
- aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT;
-
- if (aspects) {
- atts[att_count].aspectMask = aspects;
- atts[att_count].colorAttachment = 0; /* Ignored */
- atts[att_count].clearValue =
- state->attachments[ds_att_idx].vk_clear_value;
- att_count++;
- }
- }
- }
-
- if (att_count == 0)
- return;
-
- perf_debug("Render area doesn't match render pass granularity, falling back "
- "to vkCmdClearAttachments for VK_ATTACHMENT_LOAD_OP_CLEAR.\n");
-
- /* From the Vulkan 1.0 spec:
- *
- * "VK_ATTACHMENT_LOAD_OP_CLEAR specifies that the contents within the
- * render area will be cleared to a uniform value, which is specified
- * when a render pass instance is begun."
- *
- * So the clear is only constrained by the render area and not by pipeline
- * state such as scissor or viewport, these are the semantics of
- * vkCmdClearAttachments as well.
- */
- VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
- VkClearRect rect = {
- .rect = state->render_area,
- .baseArrayLayer = 0,
- .layerCount = 1,
- };
- v3dv_CmdClearAttachments(_cmd_buffer, att_count, atts, 1, &rect);
-}
-
-static struct v3dv_job *
-cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t subpass_idx,
- enum v3dv_job_type type)
-{
- assert(type == V3DV_JOB_TYPE_GPU_CL ||
- type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- assert(subpass_idx < state->pass->subpass_count);
-
- /* Starting a new job can trigger a finish of the current one, so don't
- * change the command buffer state for the new job until we are done creating
- * the new job.
- */
- struct v3dv_job *job =
- v3dv_cmd_buffer_start_job(cmd_buffer, subpass_idx, type);
- if (!job)
- return NULL;
-
- state->subpass_idx = subpass_idx;
-
- /* If we are starting a new job we need to setup binning. We only do this
- * for V3DV_JOB_TYPE_GPU_CL jobs because V3DV_JOB_TYPE_GPU_CL_SECONDARY
- * jobs are not submitted to the GPU directly, and are instead meant to be
- * branched to from other V3DV_JOB_TYPE_GPU_CL jobs.
- */
- if (type == V3DV_JOB_TYPE_GPU_CL &&
- job->first_subpass == state->subpass_idx) {
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
-
- const uint8_t internal_bpp =
- v3dv_framebuffer_compute_internal_bpp(framebuffer, subpass);
-
- v3dv_job_start_frame(job,
- framebuffer->width,
- framebuffer->height,
- framebuffer->layers,
- subpass->color_count,
- internal_bpp);
-
- /* FIXME: we don't suppport resolve attachments yet */
- assert(subpass->resolve_attachments == NULL);
+ v3dv_job_start_frame(job,
+ framebuffer->width,
+ framebuffer->height,
+ layers,
+ true,
+ subpass->color_count,
+ internal_bpp,
+ msaa);
}
return job;
cmd_buffer_update_tile_alignment(cmd_buffer);
/* If we can't use TLB clears then we need to emit draw clears for any
- * LOAD_OP_CLEAR attachments in this subpass now.
+ * LOAD_OP_CLEAR attachments in this subpass now. We might also need to emit
+ * Depth/Stencil clears if we hit GFXH-1461.
*
* Secondary command buffers don't start subpasses (and may not even have
* framebuffer state), so we only care about this in primaries. The only
* attachment load clears, but we don't have any instances of that right
* now.
*/
- if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
- !cmd_buffer->state.tile_aligned_render_area) {
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cmd_buffer_emit_subpass_clears(cmd_buffer);
- }
return job;
}
job->is_subpass_finish = true;
}
-void
-v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
v3dv_cmd_buffer_subpass_finish(cmd_buffer);
v3dv_cmd_buffer_finish_job(cmd_buffer);
+ cmd_buffer_subpass_handle_pending_resolves(cmd_buffer);
+
/* We are no longer inside a render pass */
state->framebuffer = NULL;
state->pass = NULL;
state->subpass_idx = -1;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
}
static void
-emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
-
-static void
-ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t slot_size,
- uint32_t used_count,
- uint32_t *alloc_count,
- void **ptr);
-
-static void
-cmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary,
- struct v3dv_cmd_buffer *secondary)
-{
- struct v3dv_cmd_buffer_state *p_state = &primary->state;
- struct v3dv_cmd_buffer_state *s_state = &secondary->state;
-
- const uint32_t total_state_count =
- p_state->query.end.used_count + s_state->query.end.used_count;
- ensure_array_state(primary,
- sizeof(struct v3dv_end_query_cpu_job_info),
- total_state_count,
- &p_state->query.end.alloc_count,
- (void **) &p_state->query.end.states);
- v3dv_return_if_oom(primary, NULL);
-
- for (uint32_t i = 0; i < s_state->query.end.used_count; i++) {
- const struct v3dv_end_query_cpu_job_info *s_qstate =
- &secondary->state.query.end.states[i];
-
- struct v3dv_end_query_cpu_job_info *p_qstate =
- &p_state->query.end.states[p_state->query.end.used_count++];
-
- p_qstate->pool = s_qstate->pool;
- p_qstate->query = s_qstate->query;
- }
-}
-
-static void
clone_bo_list(struct v3dv_cmd_buffer *cmd_buffer,
struct list_head *dst,
struct list_head *src)
list_inithead(dst);
list_for_each_entry(struct v3dv_bo, bo, src, list_link) {
struct v3dv_bo *clone_bo =
- vk_alloc(&cmd_buffer->device->alloc, sizeof(struct v3dv_bo), 8,
+ vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct v3dv_bo), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!clone_bo) {
v3dv_flag_oom(cmd_buffer, NULL);
* for jobs recorded in secondary command buffers when we want to execute
* them in primaries.
*/
-static void
-job_clone_in_cmd_buffer(struct v3dv_job *job,
- struct v3dv_cmd_buffer *cmd_buffer)
+struct v3dv_job *
+v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
+ struct v3dv_cmd_buffer *cmd_buffer)
{
- struct v3dv_job *clone_job = vk_alloc(&job->device->alloc,
+ struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc,
sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!clone_job) {
v3dv_flag_oom(cmd_buffer, NULL);
- return;
+ return NULL;
}
/* Cloned jobs don't duplicate resources! */
clone_bo_list(cmd_buffer, &clone_job->indirect.bo_list,
&job->indirect.bo_list);
}
-}
-static void
-cmd_buffer_execute_inside_pass(struct v3dv_cmd_buffer *primary,
- uint32_t cmd_buffer_count,
- const VkCommandBuffer *cmd_buffers)
-{
- assert(primary->state.job);
-
- if (primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY)
- emit_occlusion_query(primary);
-
- for (uint32_t i = 0; i < cmd_buffer_count; i++) {
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
-
- assert(secondary->usage_flags &
- VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
-
- list_for_each_entry(struct v3dv_job, secondary_job,
- &secondary->jobs, list_link) {
- if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
- /* If the job is a CL, then we branch to it from the primary BCL.
- * In this case the secondary's BCL is finished with a
- * RETURN_FROM_SUB_LIST command to return back to the primary BCL
- * once we are done executing it.
- */
- assert(v3dv_cl_offset(&secondary_job->rcl) == 0);
- assert(secondary_job->bcl.bo);
-
- /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */
- STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1);
- assert(v3dv_cl_offset(&secondary_job->bcl) >= 1);
- assert(*(((uint8_t *)secondary_job->bcl.next) - 1) ==
- V3D42_RETURN_FROM_SUB_LIST_opcode);
-
- /* If we had to split the primary job while executing the secondary
- * we will have to create a new one so we can emit the branch
- * instruction.
- *
- * FIXME: in this case, maybe just copy the secondary BCL without
- * the RETURN_FROM_SUB_LIST into the primary job to skip the
- * branch?
- */
- struct v3dv_job *primary_job = primary->state.job;
- if (!primary_job) {
- primary_job =
- v3dv_cmd_buffer_subpass_resume(primary,
- primary->state.subpass_idx);
- }
-
- /* Make sure our primary job has all required BO references */
- set_foreach(secondary_job->bos, entry) {
- struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
- v3dv_job_add_bo(primary_job, bo);
- }
-
- /* Emit the branch instruction */
- v3dv_cl_ensure_space_with_branch(&primary_job->bcl,
- cl_packet_length(BRANCH_TO_SUB_LIST));
- v3dv_return_if_oom(primary, NULL);
-
- cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) {
- branch.address = v3dv_cl_address(secondary_job->bcl.bo, 0);
- }
-
- /* If this secondary has barriers, we need to flag them in the
- * primary job.
- *
- * FIXME: This might be moving the sync point too early though,
- * maybe we would need to split the primary in this case to ensure
- * that barriers execute right before the secondary.
- */
- primary_job->serialize |= secondary_job->serialize;
- primary_job->needs_bcl_sync |= secondary_job->needs_bcl_sync;
- } else if (secondary_job->type == V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS) {
- const struct v3dv_clear_attachments_cpu_job_info *info =
- &secondary_job->cpu.clear_attachments;
- v3dv_CmdClearAttachments(v3dv_cmd_buffer_to_handle(primary),
- info->attachment_count,
- info->attachments,
- info->rect_count,
- info->rects);
- } else {
- /* This is a regular job (CPU or GPU), so just finish the current
- * primary job (if any) and then add the secondary job to the
- * primary's job list right after it.
- */
- v3dv_cmd_buffer_finish_job(primary);
- job_clone_in_cmd_buffer(secondary_job, primary);
- }
- }
-
- /* If the secondary has recorded any vkCmdEndQuery commands, we need to
- * copy this state to the primary so it is processed properly when the
- * current primary job is finished.
- */
- cmd_buffer_copy_secondary_end_query_state(primary, secondary);
- }
+ return clone_job;
}
static void
uint32_t cmd_buffer_count,
const VkCommandBuffer *cmd_buffers)
{
+ bool pending_barrier = false;
+ bool pending_bcl_barrier = false;
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
list_for_each_entry(struct v3dv_job, secondary_job,
&secondary->jobs, list_link) {
/* These can only happen inside a render pass */
- assert(secondary_job->type != V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS);
assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY);
- job_clone_in_cmd_buffer(secondary_job, primary);
+ struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
+ if (!job)
+ return;
+
+ if (pending_barrier) {
+ job->serialize = true;
+ if (pending_bcl_barrier)
+ job->needs_bcl_sync = true;
+ pending_barrier = false;
+ pending_bcl_barrier = false;
+ }
}
+
+ /* If this secondary had any pending barrier state we will need that
+ * barrier state consumed with whatever comes after it (first job in
+ * the next secondary or the primary, if this was the last secondary).
+ */
+ assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
+ pending_barrier = secondary->state.has_barrier;
+ pending_bcl_barrier = secondary->state.has_bcl_barrier;
+ }
+
+ if (pending_barrier) {
+ primary->state.has_barrier = true;
+ primary->state.has_bcl_barrier |= pending_bcl_barrier;
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
const VkCommandBuffer *pCommandBuffers)
V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer);
if (primary->state.pass != NULL) {
- cmd_buffer_execute_inside_pass(primary,
- commandBufferCount, pCommandBuffers);
+ v3dv_X(primary->device, cmd_buffer_execute_inside_pass)
+ (primary, commandBufferCount, pCommandBuffers);
} else {
cmd_buffer_execute_outside_pass(primary,
commandBufferCount, pCommandBuffers);
}
}
- cmd_buffer->state.dynamic.mask = dynamic_mask;
- cmd_buffer->state.dirty |= dirty;
-}
-
-static void
-job_update_ez_state(struct v3dv_job *job,
- struct v3dv_pipeline *pipeline,
- struct v3dv_cmd_buffer_state *state)
-{
- /* If we don't have a depth attachment at all, disable */
- if (!state->pass) {
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
-
- assert(state->subpass_idx < state->pass->subpass_count);
- struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx];
- if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) {
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
-
- /* Otherwise, look at the curently bound pipeline state */
- switch (pipeline->ez_state) {
- case VC5_EZ_UNDECIDED:
- /* If the pipeline didn't pick a direction but didn't disable, then go
- * along with the current EZ state. This allows EZ optimization for Z
- * func == EQUAL or NEVER.
- */
- break;
-
- case VC5_EZ_LT_LE:
- case VC5_EZ_GT_GE:
- /* If the pipeline picked a direction, then it needs to match the current
- * direction if we've decided on one.
- */
- if (job->ez_state == VC5_EZ_UNDECIDED)
- job->ez_state = pipeline->ez_state;
- else if (job->ez_state != pipeline->ez_state)
- job->ez_state = VC5_EZ_DISABLED;
- break;
-
- case VC5_EZ_DISABLED:
- /* If the pipeline disables EZ because of a bad Z func or stencil
- * operation, then we can't do any more EZ in this frame.
- */
- job->ez_state = VC5_EZ_DISABLED;
- break;
- }
-
- /* If the FS writes Z, then it may update against the chosen EZ direction */
- if (pipeline->fs->current_variant->prog_data.fs->writes_z)
- job->ez_state = VC5_EZ_DISABLED;
-
- if (job->first_ez_state == VC5_EZ_UNDECIDED &&
- job->ez_state != VC5_EZ_DISABLED) {
- job->first_ez_state = job->ez_state;
- }
-}
-
-/* Note that the following poopulate methods doesn't do a detailed fill-up of
- * the v3d_fs_key. Here we just fill-up cmd_buffer specific info. All info
- * coming from the pipeline create info was alredy filled up when the pipeline
- * was created
- */
-static void
-cmd_buffer_populate_v3d_key(struct v3d_key *key,
- struct v3dv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipeline_binding)
-{
- if (cmd_buffer->state.pipeline->combined_index_map != NULL) {
- struct v3dv_descriptor_map *texture_map = &cmd_buffer->state.pipeline->texture_map;
- struct v3dv_descriptor_map *sampler_map = &cmd_buffer->state.pipeline->sampler_map;
- struct v3dv_descriptor_state *descriptor_state =
- &cmd_buffer->state.descriptor_state[pipeline_binding];
-
- hash_table_foreach(cmd_buffer->state.pipeline->combined_index_map, entry) {
- uint32_t combined_idx = (uint32_t)(uintptr_t) (entry->data);
- uint32_t combined_idx_key =
- cmd_buffer->state.pipeline->combined_index_to_key_map[combined_idx];
- uint32_t texture_idx;
- uint32_t sampler_idx;
-
- v3dv_pipeline_combined_index_key_unpack(combined_idx_key,
- &texture_idx, &sampler_idx);
-
- struct v3dv_image_view *image_view =
- v3dv_descriptor_map_get_image_view(descriptor_state,
- texture_map,
- cmd_buffer->state.pipeline->layout,
- texture_idx);
-
- assert(image_view);
-
- const struct v3dv_sampler *sampler = NULL;
- if (sampler_idx != V3DV_NO_SAMPLER_IDX) {
- sampler =
- v3dv_descriptor_map_get_sampler(descriptor_state,
- sampler_map,
- cmd_buffer->state.pipeline->layout,
- sampler_idx);
- assert(sampler);
- }
-
- key->tex[combined_idx].return_size =
- v3dv_get_tex_return_size(image_view->format,
- sampler ? sampler->compare_enable : false);
-
- if (key->tex[combined_idx].return_size == 16) {
- key->tex[combined_idx].return_channels = 2;
- } else {
- key->tex[combined_idx].return_channels = 4;
- }
-
- /* Note: In general, we don't need to do anything for the swizzle, as
- * that is handled with the swizzle info at the Texture State, and the
- * default values for key->tex[].swizzle were already filled up at
- * the pipeline creation time.
- *
- * The only exeption in which we want to apply a texture swizzle
- * lowering in the shader is to force alpha to 1 when using clamp
- * to border with transparent black in combination with specific
- * formats.
- */
- if (sampler && sampler->clamp_to_transparent_black_border) {
- switch (image_view->vk_format) {
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- key->tex[combined_idx].swizzle[3] = PIPE_SWIZZLE_1;
- break;
- default:
- break;
- }
- }
+ if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
+ if (dest->color_write_enable != src->color_write_enable) {
+ dest->color_write_enable = src->color_write_enable;
+ dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
}
}
-}
-
-static void
-update_fs_variant(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_shader_variant *variant;
- struct v3dv_pipeline_stage *p_stage = cmd_buffer->state.pipeline->fs;
- struct v3d_fs_key local_key;
-
- /* We start with a copy of the original pipeline key */
- memcpy(&local_key, &p_stage->key.fs, sizeof(struct v3d_fs_key));
-
- cmd_buffer_populate_v3d_key(&local_key.base, cmd_buffer,
- VK_PIPELINE_BIND_POINT_GRAPHICS);
-
- VkResult vk_result;
- variant = v3dv_get_shader_variant(p_stage, &local_key.base,
- sizeof(struct v3d_fs_key),
- &cmd_buffer->device->alloc,
- &vk_result);
- /* At this point we are not creating a vulkan object to return to the
- * API user, so we can't really return back a OOM error
- */
- assert(variant);
- assert(vk_result == VK_SUCCESS);
-
- p_stage->current_variant = variant;
-}
-
-static void
-update_vs_variant(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_shader_variant *variant;
- struct v3dv_pipeline_stage *p_stage;
- struct v3d_vs_key local_key;
- VkResult vk_result;
-
- /* We start with a copy of the original pipeline key */
- p_stage = cmd_buffer->state.pipeline->vs;
- memcpy(&local_key, &p_stage->key.vs, sizeof(struct v3d_vs_key));
-
- cmd_buffer_populate_v3d_key(&local_key.base, cmd_buffer,
- VK_PIPELINE_BIND_POINT_GRAPHICS);
-
- variant = v3dv_get_shader_variant(p_stage, &local_key.base,
- sizeof(struct v3d_vs_key),
- &cmd_buffer->device->alloc,
- &vk_result);
- /* At this point we are not creating a vulkan object to return to the
- * API user, so we can't really return back a OOM error
- */
- assert(variant);
- assert(vk_result == VK_SUCCESS);
-
- p_stage->current_variant = variant;
-
- /* Now the vs_bin */
- p_stage = cmd_buffer->state.pipeline->vs_bin;
- memcpy(&local_key, &p_stage->key.vs, sizeof(struct v3d_vs_key));
-
- cmd_buffer_populate_v3d_key(&local_key.base, cmd_buffer,
- VK_PIPELINE_BIND_POINT_GRAPHICS);
- variant = v3dv_get_shader_variant(p_stage, &local_key.base,
- sizeof(struct v3d_vs_key),
- &cmd_buffer->device->alloc,
- &vk_result);
-
- /* At this point we are not creating a vulkan object to return to the
- * API user, so we can't really return back a OOM error
- */
- assert(variant);
- assert(vk_result == VK_SUCCESS);
-
- p_stage->current_variant = variant;
-}
-
-static void
-update_cs_variant(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_shader_variant *variant;
- struct v3dv_pipeline_stage *p_stage = cmd_buffer->state.pipeline->cs;
- struct v3d_key local_key;
-
- /* We start with a copy of the original pipeline key */
- memcpy(&local_key, &p_stage->key.base, sizeof(struct v3d_key));
-
- cmd_buffer_populate_v3d_key(&local_key, cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE);
-
- VkResult result;
- variant = v3dv_get_shader_variant(p_stage, &local_key,
- sizeof(struct v3d_key),
- &cmd_buffer->device->alloc,
- &result);
- /* At this point we are not creating a vulkan object to return to the
- * API user, so we can't really return back a OOM error
- */
- assert(variant);
- assert(result == VK_SUCCESS);
-
- p_stage->current_variant = variant;
-}
-/*
- * Some updates on the cmd buffer requires also updates on the shader being
- * compiled at the pipeline. The poster boy here are textures, as the compiler
- * needs to do certain things depending on the texture format. So here we
- * re-create the v3d_keys and update the variant. Note that internally the
- * pipeline has a variant cache (hash table) to avoid unneeded compilations
- *
- */
-static void
-update_pipeline_variants(struct v3dv_cmd_buffer *cmd_buffer)
-{
- assert(cmd_buffer->state.pipeline);
-
- if (v3dv_pipeline_get_binding_point(cmd_buffer->state.pipeline) ==
- VK_PIPELINE_BIND_POINT_GRAPHICS) {
- update_fs_variant(cmd_buffer);
- update_vs_variant(cmd_buffer);
- } else {
- update_cs_variant(cmd_buffer);
- }
+ cmd_buffer->state.dynamic.mask = dynamic_mask;
+ cmd_buffer->state.dirty |= dirty;
}
static void
struct v3dv_pipeline *pipeline)
{
assert(pipeline && !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
- if (cmd_buffer->state.pipeline == pipeline)
+ if (cmd_buffer->state.gfx.pipeline == pipeline)
return;
-
- /* Enable always flush if we are blending to sRGB render targets. This
- * fixes test failures in:
- * dEQP-VK.pipeline.blend.format.r8g8b8a8_srgb.*
- *
- * FIXME: not sure why we need this. The tile buffer is always linear, with
- * conversion from/to sRGB happening on tile load/store operations. This
- * means that when we enable flushing the only difference is that we convert
- * to sRGB on the store after each draw call and we convert from sRGB on the
- * load before each draw call, but the blend happens in linear format in the
- * tile buffer anyway, which is the same scenario as if we didn't flush.
- */
- assert(pipeline->subpass);
- if (pipeline->subpass->has_srgb_rt && pipeline->blend.enables) {
- assert(cmd_buffer->state.job);
- cmd_buffer->state.job->always_flush = true;
- perf_debug("flushing draw calls for subpass %d because bound pipeline "
- "uses sRGB blending\n", cmd_buffer->state.subpass_idx);
- }
-
- cmd_buffer->state.pipeline = pipeline;
+ cmd_buffer->state.gfx.pipeline = pipeline;
cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state);
{
assert(pipeline && pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
- if (cmd_buffer->state.pipeline == pipeline)
+ if (cmd_buffer->state.compute.pipeline == pipeline)
return;
- cmd_buffer->state.pipeline = pipeline;
- cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.compute.pipeline = pipeline;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetViewport(VkCommandBuffer commandBuffer,
uint32_t firstViewport,
uint32_t viewportCount,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetScissor(VkCommandBuffer commandBuffer,
uint32_t firstScissor,
uint32_t scissorCount,
cmd_buffer->state.clip_window.extent.width = maxx - minx;
cmd_buffer->state.clip_window.extent.height = maxy - miny;
- emit_clip_window(cmd_buffer->state.job, &cmd_buffer->state.clip_window);
+ v3dv_X(cmd_buffer->device, job_emit_clip_window)
+ (cmd_buffer->state.job, &cmd_buffer->state.clip_window);
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR;
}
static void
-emit_viewport(struct v3dv_cmd_buffer *cmd_buffer)
+update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t dirty_uniform_state)
{
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- /* FIXME: right now we only support one viewport. viewporst[0] would work
- * now, would need to change if we allow multiple viewports
+ /* We need to update uniform streams if any piece of state that is passed
+ * to the shader as a uniform may have changed.
+ *
+ * If only descriptor sets are dirty then we can safely ignore updates
+ * for shader stages that don't access descriptors.
*/
- float *vptranslate = dynamic->viewport.translate[0];
- float *vpscale = dynamic->viewport.scale[0];
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const uint32_t required_cl_size =
- cl_packet_length(CLIPPER_XY_SCALING) +
- cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) +
- cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) +
- cl_packet_length(VIEWPORT_OFFSET);
- v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
- clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
- clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
- }
-
- cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
- clip.viewport_z_offset_zc_to_zs = vptranslate[2];
- clip.viewport_z_scale_zc_to_zs = vpscale[2];
- }
- cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
- /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */
- float z1 = vptranslate[2];
- float z2 = vptranslate[2] + vpscale[2];
- clip.minimum_zw = MIN2(z1, z2);
- clip.maximum_zw = MAX2(z1, z2);
- }
-
- cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
- vp.viewport_centre_x_coordinate = vptranslate[0];
- vp.viewport_centre_y_coordinate = vptranslate[1];
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT;
-}
-
-static void
-emit_stencil(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic;
-
- const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
- V3DV_DYNAMIC_STENCIL_WRITE_MASK |
- V3DV_DYNAMIC_STENCIL_REFERENCE;
-
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- 2 * cl_packet_length(STENCIL_CFG));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- bool emitted_stencil = false;
- for (uint32_t i = 0; i < 2; i++) {
- if (pipeline->emit_stencil_cfg[i]) {
- if (dynamic_state->mask & dynamic_stencil_states) {
- cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
- pipeline->stencil_cfg[i], config) {
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) {
- config.stencil_test_mask =
- i == 0 ? dynamic_state->stencil_compare_mask.front :
- dynamic_state->stencil_compare_mask.back;
- }
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) {
- config.stencil_write_mask =
- i == 0 ? dynamic_state->stencil_write_mask.front :
- dynamic_state->stencil_write_mask.back;
- }
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) {
- config.stencil_ref_value =
- i == 0 ? dynamic_state->stencil_reference.front :
- dynamic_state->stencil_reference.back;
- }
- }
- } else {
- cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]);
- }
-
- emitted_stencil = true;
- }
- }
-
- if (emitted_stencil) {
- const uint32_t dynamic_stencil_dirty_flags =
- V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
- V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
- V3DV_CMD_DIRTY_STENCIL_REFERENCE;
- cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags;
- }
-}
-
-static void
-emit_depth_bias(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- assert(pipeline);
-
- if (!pipeline->depth_bias.enabled)
- return;
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- cl_emit(&job->bcl, DEPTH_OFFSET, bias) {
- bias.depth_offset_factor = dynamic->depth_bias.slope_factor;
- bias.depth_offset_units = dynamic->depth_bias.constant_factor;
- if (pipeline->depth_bias.is_z16)
- bias.depth_offset_units *= 256.0f;
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS;
-}
-
-static void
-emit_line_width(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, LINE_WIDTH, line) {
- line.line_width = cmd_buffer->state.dynamic.line_width;
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH;
-}
-
-static void
-emit_blend(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- assert(pipeline);
-
- const uint32_t blend_packets_size =
- cl_packet_length(BLEND_ENABLES) +
- cl_packet_length(BLEND_CONSTANT_COLOR) +
- cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS +
- cl_packet_length(COLOR_WRITE_MASKS);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
- if (pipeline->blend.enables) {
- cl_emit(&job->bcl, BLEND_ENABLES, enables) {
- enables.mask = pipeline->blend.enables;
- }
- }
-
- for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
- if (pipeline->blend.enables & (1 << i))
- cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
- }
-
- cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
- mask.mask = pipeline->blend.color_write_masks;
- }
- }
-
- if (pipeline->blend.needs_color_constants &&
- cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) {
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
- color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]);
- color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]);
- color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]);
- color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]);
- }
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS;
- }
-}
-
-static void
-emit_flat_shade_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(FLAT_SHADE_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.flat_shade_flags_for_varyings_v024 = varyings;
- flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower;
- flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static void
-emit_noperspective_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(NON_PERSPECTIVE_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.non_perspective_flags_for_varyings_v024 = varyings;
- flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower;
- flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static void
-emit_centroid_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(CENTROID_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.centroid_flags_for_varyings_v024 = varyings;
- flags.action_for_centroid_flags_of_lower_numbered_varyings = lower;
- flags.action_for_centroid_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static bool
-emit_varying_flags(struct v3dv_job *job,
- uint32_t num_flags,
- const uint32_t *flags,
- void (*flag_emit_callback)(struct v3dv_job *job,
- int varying_offset,
- uint32_t flags,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher))
-{
- bool emitted_any = false;
- for (int i = 0; i < num_flags; i++) {
- if (!flags[i])
- continue;
-
- if (emitted_any) {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_UNCHANGED,
- V3D_VARYING_FLAGS_ACTION_UNCHANGED);
- } else if (i == 0) {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_UNCHANGED,
- V3D_VARYING_FLAGS_ACTION_ZEROED);
- } else {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_ZEROED,
- V3D_VARYING_FLAGS_ACTION_ZEROED);
- }
-
- emitted_any = true;
- }
-
- return emitted_any;
-}
-
-static void
-emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
-
- struct v3d_fs_prog_data *prog_data_fs =
- pipeline->fs->current_variant->prog_data.fs;
-
- const uint32_t num_flags =
- ARRAY_SIZE(prog_data_fs->flat_shade_flags);
- const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags;
- const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags;
- const uint32_t *centroid_flags = prog_data_fs->centroid_flags;
-
- if (!emit_varying_flags(job, num_flags, flat_shade_flags,
- emit_flat_shade_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
- }
-
- if (!emit_varying_flags(job, num_flags, noperspective_flags,
- emit_noperspective_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
- }
-
- if (!emit_varying_flags(job, num_flags, centroid_flags,
- emit_centroid_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
- }
-}
-
-static void
-emit_configuration_bits(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
assert(pipeline);
- job_update_ez_state(job, pipeline, &cmd_buffer->state);
+ const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE;
+ const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT;
+ const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+ const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
+ const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX;
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
- config.early_z_updates_enable = job->ez_state != VC5_EZ_DISABLED;
- config.early_z_enable = config.early_z_updates_enable;
- }
-}
-
-static void
-emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- struct v3dv_pipeline *pipeline = state->pipeline;
- assert(pipeline);
-
- /* Upload the uniforms to the indirect CL first */
- struct v3dv_cl_reloc fs_uniforms =
- v3dv_write_uniforms(cmd_buffer, pipeline->fs);
-
- struct v3dv_cl_reloc vs_uniforms =
- v3dv_write_uniforms(cmd_buffer, pipeline->vs);
-
- struct v3dv_cl_reloc vs_bin_uniforms =
- v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin);
-
- /* Update the cache dirty flag based on the shader progs data */
- job->tmu_dirty_rcl |= pipeline->vs_bin->current_variant->prog_data.vs->base.tmu_dirty_rcl;
- job->tmu_dirty_rcl |= pipeline->vs->current_variant->prog_data.vs->base.tmu_dirty_rcl;
- job->tmu_dirty_rcl |= pipeline->fs->current_variant->prog_data.fs->base.tmu_dirty_rcl;
+ /* VK_SHADER_STAGE_FRAGMENT_BIT */
+ const bool has_new_descriptors_fs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
- /* See GFXH-930 workaround below */
- uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
+ const bool has_new_push_constants_fs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
- uint32_t shader_rec_offset =
- v3dv_cl_ensure_space(&job->indirect,
- cl_packet_length(GL_SHADER_STATE_RECORD) +
- num_elements_to_emit *
- cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
- 32);
- v3dv_return_if_oom(cmd_buffer, NULL);
+ const bool needs_fs_update = has_new_pipeline ||
+ has_new_view_index ||
+ has_new_push_constants_fs ||
+ has_new_descriptors_fs ||
+ has_new_view_index;
- cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
- pipeline->shader_state_record, shader) {
+ if (needs_fs_update) {
+ struct v3dv_shader_variant *fs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
- /* FIXME: we are setting this values here and during the
- * prepacking. This is because both cl_emit_with_prepacked and v3dv_pack
- * asserts for minimum values of these. It would be good to get
- * v3dv_pack to assert on the final value if possible
- */
- shader.min_coord_shader_input_segments_required_in_play =
- pipeline->vpm_cfg_bin.As;
- shader.min_vertex_shader_input_segments_required_in_play =
- pipeline->vpm_cfg.As;
-
- shader.coordinate_shader_code_address =
- v3dv_cl_address(pipeline->vs_bin->current_variant->assembly_bo, 0);
- shader.vertex_shader_code_address =
- v3dv_cl_address(pipeline->vs->current_variant->assembly_bo, 0);
- shader.fragment_shader_code_address =
- v3dv_cl_address(pipeline->fs->current_variant->assembly_bo, 0);
-
- shader.coordinate_shader_uniforms_address = vs_bin_uniforms;
- shader.vertex_shader_uniforms_address = vs_uniforms;
- shader.fragment_shader_uniforms_address = fs_uniforms;
-
- shader.address_of_default_attribute_values =
- v3dv_cl_address(pipeline->default_attribute_values, 0);
+ cmd_buffer->state.uniforms.fs =
+ v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant);
}
- /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
- struct v3d_vs_prog_data *prog_data_vs =
- pipeline->vs->current_variant->prog_data.vs;
-
- struct v3d_vs_prog_data *prog_data_vs_bin =
- pipeline->vs_bin->current_variant->prog_data.vs;
-
- bool cs_loaded_any = false;
- const bool cs_uses_builtins = prog_data_vs_bin->uses_iid ||
- prog_data_vs_bin->uses_biid ||
- prog_data_vs_bin->uses_vid;
- const uint32_t packet_length =
- cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
-
- uint32_t emitted_va_count = 0;
- for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) {
- assert(i < MAX_VERTEX_ATTRIBS);
-
- if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED)
- continue;
-
- const uint32_t binding = pipeline->va[i].binding;
+ /* VK_SHADER_STAGE_GEOMETRY_BIT */
+ if (pipeline->has_gs) {
+ const bool has_new_descriptors_gs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages &
+ VK_SHADER_STAGE_GEOMETRY_BIT);
- /* We store each vertex attribute in the array using its driver location
- * as index.
- */
- const uint32_t location = i;
-
- struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
-
- cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
- &pipeline->vertex_attrs[i * packet_length], attr) {
-
- assert(c_vb->buffer->mem->bo);
- attr.address = v3dv_cl_address(c_vb->buffer->mem->bo,
- c_vb->buffer->mem_offset +
- pipeline->va[i].offset +
- c_vb->offset);
-
- attr.number_of_values_read_by_coordinate_shader =
- prog_data_vs_bin->vattr_sizes[location];
- attr.number_of_values_read_by_vertex_shader =
- prog_data_vs->vattr_sizes[location];
-
- /* GFXH-930: At least one attribute must be enabled and read by CS
- * and VS. If we have attributes being consumed by the VS but not
- * the CS, then set up a dummy load of the last attribute into the
- * CS's VPM inputs. (Since CS is just dead-code-elimination compared
- * to VS, we can't have CS loading but not VS).
- *
- * GFXH-1602: first attribute must be active if using builtins.
- */
- if (prog_data_vs_bin->vattr_sizes[location])
- cs_loaded_any = true;
-
- if (i == 0 && cs_uses_builtins && !cs_loaded_any) {
- attr.number_of_values_read_by_coordinate_shader = 1;
- cs_loaded_any = true;
- } else if (i == pipeline->va_count - 1 && !cs_loaded_any) {
- attr.number_of_values_read_by_coordinate_shader = 1;
- cs_loaded_any = true;
- }
+ const bool has_new_push_constants_gs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages &
+ VK_SHADER_STAGE_GEOMETRY_BIT);
- attr.maximum_index = 0xffffff;
- }
+ const bool needs_gs_update = has_new_viewport ||
+ has_new_view_index ||
+ has_new_pipeline ||
+ has_new_push_constants_gs ||
+ has_new_descriptors_gs;
- emitted_va_count++;
- }
+ if (needs_gs_update) {
+ struct v3dv_shader_variant *gs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
- if (pipeline->va_count == 0) {
- /* GFXH-930: At least one attribute must be enabled and read
- * by CS and VS. If we have no attributes being consumed by
- * the shader, set up a dummy to be loaded into the VPM.
- */
- cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
- /* Valid address of data whose value will be unused. */
- attr.address = v3dv_cl_address(job->indirect.bo, 0);
+ struct v3dv_shader_variant *gs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
- attr.type = ATTRIBUTE_FLOAT;
- attr.stride = 0;
- attr.vec_size = 1;
+ cmd_buffer->state.uniforms.gs =
+ v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant);
- attr.number_of_values_read_by_coordinate_shader = 1;
- attr.number_of_values_read_by_vertex_shader = 1;
+ cmd_buffer->state.uniforms.gs_bin =
+ v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant);
}
}
- if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- sizeof(pipeline->vcm_cache_size));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size);
- }
+ /* VK_SHADER_STAGE_VERTEX_BIT */
+ const bool has_new_descriptors_vs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT);
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(GL_SHADER_STATE));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ const bool has_new_push_constants_vs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT);
- cl_emit(&job->bcl, GL_SHADER_STATE, state) {
- state.address = v3dv_cl_address(job->indirect.bo,
- shader_rec_offset);
- state.number_of_attribute_arrays = num_elements_to_emit;
- }
+ const bool needs_vs_update = has_new_viewport ||
+ has_new_view_index ||
+ has_new_pipeline ||
+ has_new_push_constants_vs ||
+ has_new_descriptors_vs;
- cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER |
- V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
- V3DV_CMD_DIRTY_PUSH_CONSTANTS);
-}
+ if (needs_vs_update) {
+ struct v3dv_shader_variant *vs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
-static void
-emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
+ struct v3dv_shader_variant *vs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(OCCLUSION_QUERY_COUNTER));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ cmd_buffer->state.uniforms.vs =
+ v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant);
- cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
- if (cmd_buffer->state.query.active_query) {
- counter.address =
- v3dv_cl_address(cmd_buffer->state.query.active_query, 0);
- }
+ cmd_buffer->state.uniforms.vs_bin =
+ v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant);
}
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX;
}
/* This stores command buffer state that we might be about to stomp for
attachment_state_item_size * state->attachment_alloc_count;
if (state->meta.attachment_alloc_count < state->attachment_alloc_count) {
if (state->meta.attachment_alloc_count > 0)
- vk_free(&cmd_buffer->device->alloc, state->meta.attachments);
+ vk_free(&cmd_buffer->device->vk.alloc, state->meta.attachments);
- state->meta.attachments = vk_zalloc(&cmd_buffer->device->alloc,
+ state->meta.attachments = vk_zalloc(&cmd_buffer->device->vk.alloc,
attachment_state_total_size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!state->meta.attachments) {
memcpy(&state->meta.render_area, &state->render_area, sizeof(VkRect2D));
}
- state->meta.pipeline = v3dv_pipeline_to_handle(state->pipeline);
- if (state->meta.pipeline) {
- memcpy(&state->meta.dynamic, &state->dynamic, sizeof(state->dynamic));
- }
-
- /* We expect that meta operations are graphics-only and won't alter
- * compute state.
+ /* We expect that meta operations are graphics-only, so we only take into
+ * account the graphics pipeline, and the graphics state
*/
+ state->meta.gfx.pipeline = state->gfx.pipeline;
+ memcpy(&state->meta.dynamic, &state->dynamic, sizeof(state->dynamic));
+
struct v3dv_descriptor_state *gfx_descriptor_state =
- &state->descriptor_state[VK_PIPELINE_BIND_POINT_GRAPHICS];
+ &cmd_buffer->state.gfx.descriptor_state;
+
if (push_descriptor_state) {
if (gfx_descriptor_state->valid != 0) {
- memcpy(&state->meta.descriptor_state, gfx_descriptor_state,
- sizeof(state->descriptor_state));
+ memcpy(&state->meta.gfx.descriptor_state, gfx_descriptor_state,
+ sizeof(state->gfx.descriptor_state));
}
state->meta.has_descriptor_state = true;
} else {
state->subpass_idx = -1;
}
- if (state->meta.pipeline != VK_NULL_HANDLE) {
- struct v3dv_pipeline *pipeline =
- v3dv_pipeline_from_handle(state->meta.pipeline);
+ if (state->meta.gfx.pipeline != NULL) {
+ struct v3dv_pipeline *pipeline = state->meta.gfx.pipeline;
VkPipelineBindPoint pipeline_binding =
v3dv_pipeline_get_binding_point(pipeline);
v3dv_CmdBindPipeline(v3dv_cmd_buffer_to_handle(cmd_buffer),
pipeline_binding,
- state->meta.pipeline);
- if (pipeline_binding == VK_PIPELINE_BIND_POINT_GRAPHICS) {
- memcpy(&state->dynamic, &state->meta.dynamic, sizeof(state->dynamic));
- state->dirty |= dirty_dynamic_state;
- }
+ v3dv_pipeline_to_handle(state->meta.gfx.pipeline));
} else {
- state->pipeline = VK_NULL_HANDLE;
+ state->gfx.pipeline = NULL;
+ }
+
+ if (dirty_dynamic_state) {
+ memcpy(&state->dynamic, &state->meta.dynamic, sizeof(state->dynamic));
+ state->dirty |= dirty_dynamic_state;
}
if (state->meta.has_descriptor_state) {
- if (state->meta.descriptor_state.valid != 0) {
- memcpy(&state->descriptor_state[VK_PIPELINE_BIND_POINT_GRAPHICS],
- &state->meta.descriptor_state,
- sizeof(state->descriptor_state));
+ if (state->meta.gfx.descriptor_state.valid != 0) {
+ memcpy(&state->gfx.descriptor_state, &state->meta.gfx.descriptor_state,
+ sizeof(state->gfx.descriptor_state));
} else {
- state->descriptor_state[VK_PIPELINE_BIND_POINT_GRAPHICS].valid = 0;
+ state->gfx.descriptor_state.valid = 0;
}
}
memcpy(cmd_buffer->push_constants_data, state->meta.push_constants,
sizeof(state->meta.push_constants));
- state->meta.pipeline = VK_NULL_HANDLE;
+ state->meta.gfx.pipeline = NULL;
state->meta.framebuffer = VK_NULL_HANDLE;
state->meta.pass = VK_NULL_HANDLE;
state->meta.subpass_idx = -1;
state->meta.has_descriptor_state = false;
}
-/* FIXME: C&P from v3dx_draw. Refactor to common place? */
-static uint32_t
-v3d_hw_prim_type(enum pipe_prim_type prim_type)
-{
- switch (prim_type) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_LOOP:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- return prim_type;
-
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY);
-
- default:
- unreachable("Unsupported primitive type");
- }
-}
-
-struct v3dv_draw_info {
- uint32_t vertex_count;
- uint32_t instance_count;
- uint32_t first_vertex;
- uint32_t first_instance;
-};
-
-static void
-cmd_buffer_emit_draw(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_draw_info *info)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- struct v3dv_pipeline *pipeline = state->pipeline;
-
- assert(pipeline);
-
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->vs->topology);
-
- if (info->first_instance > 0) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
- base.base_instance = info->first_instance;
- base.base_vertex = 0;
- }
- }
-
- if (info->instance_count > 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.index_of_first_vertex = info->first_vertex;
- prim.number_of_instances = info->instance_count;
- prim.instance_length = info->vertex_count;
- }
- } else {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
- cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.length = info->vertex_count;
- prim.index_of_first_vertex = info->first_vertex;
- }
- }
-}
-
static struct v3dv_job *
cmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer)
{
- /* If we emitted a pipeline barrier right before this draw we won't have
- * an active job. In that case, create a new job continuing the current
- * subpass.
- */
struct v3dv_job *job = cmd_buffer->state.job;
- if (!job) {
- job = v3dv_cmd_buffer_subpass_resume(cmd_buffer,
- cmd_buffer->state.subpass_idx);
- return job;
- }
+ assert(job);
/* If the job has been flagged with 'always_flush' and it has already
* recorded any draw calls then we need to start a new job for it.
return job;
}
+/**
+ * The Vulkan spec states:
+ *
+ * "It is legal for a subpass to use no color or depth/stencil
+ * attachments (...) This kind of subpass can use shader side effects such
+ * as image stores and atomics to produce an output. In this case, the
+ * subpass continues to use the width, height, and layers of the framebuffer
+ * to define the dimensions of the rendering area, and the
+ * rasterizationSamples from each pipeline’s
+ * VkPipelineMultisampleStateCreateInfo to define the number of samples used
+ * in rasterization."
+ *
+ * We need to enable MSAA in the TILE_BINNING_MODE_CFG packet, which we
+ * emit when we start a new frame at the begining of a subpass. At that point,
+ * if the framebuffer doesn't have any attachments we won't enable MSAA and
+ * the job won't be valid in the scenario described by the spec.
+ *
+ * This function is intended to be called before a draw call and will test if
+ * we are in that scenario, in which case, it will restart the current job
+ * with MSAA enabled.
+ */
static void
-cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
+cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ assert(cmd_buffer->state.job);
+
+ /* We don't support variableMultisampleRate so we know that all pipelines
+ * bound in the same subpass must have matching number of samples, so we
+ * can do this check only on the first draw call.
+ */
+ if (cmd_buffer->state.job->draw_count > 0)
+ return;
+
+ /* We only need to restart the frame if the pipeline requires MSAA but
+ * our frame tiling didn't enable it.
+ */
+ if (!cmd_buffer->state.gfx.pipeline->msaa ||
+ cmd_buffer->state.job->frame_tiling.msaa) {
+ return;
+ }
+
+ /* FIXME: Secondary command buffers don't start frames. Instead, they are
+ * recorded into primary jobs that start them. For secondaries, we should
+ * still handle this scenario, but we should do that when we record them
+ * into primaries by testing if any of the secondaries has multisampled
+ * draw calls in them, and then using that info to decide if we need to
+ * restart the primary job into which they are being recorded.
+ */
+ if (cmd_buffer->level != VK_COMMAND_BUFFER_LEVEL_PRIMARY)
+ return;
+
+ /* Drop the current job and restart it with MSAA enabled */
+ struct v3dv_job *old_job = cmd_buffer->state.job;
+ cmd_buffer->state.job = NULL;
+
+ struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
+ sizeof(struct v3dv_job), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!job) {
+ v3dv_flag_oom(cmd_buffer, NULL);
+ return;
+ }
+
+ v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer,
+ cmd_buffer->state.subpass_idx);
+ cmd_buffer->state.job = job;
+
+ v3dv_job_start_frame(job,
+ old_job->frame_tiling.width,
+ old_job->frame_tiling.height,
+ old_job->frame_tiling.layers,
+ true,
+ old_job->frame_tiling.render_target_count,
+ old_job->frame_tiling.internal_bpp,
+ true /* msaa */);
+
+ v3dv_job_destroy(old_job);
+}
+
+void
+v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
{
- assert(cmd_buffer->state.pipeline);
- assert(!(cmd_buffer->state.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
+ assert(cmd_buffer->state.gfx.pipeline);
+ assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
+
+ /* If we emitted a pipeline barrier right before this draw we won't have
+ * an active job. In that case, create a new job continuing the current
+ * subpass.
+ */
+ if (!cmd_buffer->state.job) {
+ v3dv_cmd_buffer_subpass_resume(cmd_buffer,
+ cmd_buffer->state.subpass_idx);
+ }
+
+ /* Restart single sample job for MSAA pipeline if needed */
+ cmd_buffer_restart_job_for_msaa_if_needed(cmd_buffer);
/* If the job is configured to flush on every draw call we need to create
* a new job now.
struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer);
job->draw_count++;
- /* We may need to compile shader variants based on bound textures */
- uint32_t *dirty = &cmd_buffer->state.dirty;
- if (*dirty & (V3DV_CMD_DIRTY_PIPELINE |
- V3DV_CMD_DIRTY_DESCRIPTOR_SETS)) {
- update_pipeline_variants(cmd_buffer);
- }
-
/* GL shader state binds shaders, uniform and vertex attribute state. The
* compiler injects uniforms to handle some descriptor types (such as
* textures), so we need to regen that when descriptor state changes.
* We also need to emit new shader state if we have a dirty viewport since
* that will require that we new uniform state for QUNIFORM_VIEWPORT_*.
*/
- if (*dirty & (V3DV_CMD_DIRTY_PIPELINE |
- V3DV_CMD_DIRTY_VERTEX_BUFFER |
- V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
- V3DV_CMD_DIRTY_PUSH_CONSTANTS |
- V3DV_CMD_DIRTY_VIEWPORT)) {
- emit_gl_shader_state(cmd_buffer);
- }
+ uint32_t *dirty = &cmd_buffer->state.dirty;
+
+ const uint32_t dirty_uniform_state =
+ *dirty & (V3DV_CMD_DIRTY_PIPELINE |
+ V3DV_CMD_DIRTY_PUSH_CONSTANTS |
+ V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
+ V3DV_CMD_DIRTY_VIEWPORT |
+ V3DV_CMD_DIRTY_VIEW_INDEX);
+
+ if (dirty_uniform_state)
+ update_gfx_uniform_state(cmd_buffer, dirty_uniform_state);
+
+ struct v3dv_device *device = cmd_buffer->device;
+
+ if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER))
+ v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) {
- emit_configuration_bits(cmd_buffer);
- emit_varyings_state(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer);
}
if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) {
}
if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) {
- emit_viewport(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer);
}
+ if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER)
+ v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer);
+
const uint32_t dynamic_stencil_dirty_flags =
V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
V3DV_CMD_DIRTY_STENCIL_REFERENCE;
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags))
- emit_stencil(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS))
- emit_depth_bias(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS))
- emit_blend(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer);
if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY)
- emit_occlusion_query(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer);
if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH)
- emit_line_width(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer);
+
+ if (*dirty & V3DV_CMD_DIRTY_PIPELINE)
+ v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer);
+
+ if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE))
+ v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer);
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE;
}
+static inline void
+cmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t view_index)
+{
+ cmd_buffer->state.view_index = view_index;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX;
+}
+
static void
cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_draw_info *info)
{
- cmd_buffer_emit_pre_draw(cmd_buffer);
- cmd_buffer_emit_draw(cmd_buffer, info);
+
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ }
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDraw(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance)
{
+ if (vertexCount == 0 || instanceCount == 0)
+ return;
+
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
struct v3dv_draw_info info = {};
info.vertex_count = vertexCount;
cmd_buffer_draw(cmd_buffer, &info);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
int32_t vertexOffset,
uint32_t firstInstance)
{
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
-
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->vs->topology);
- uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
- uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size;
+ if (indexCount == 0 || instanceCount == 0)
+ return;
- if (vertexOffset != 0 || firstInstance != 0) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
- base.base_instance = firstInstance;
- base.base_vertex = vertexOffset;
- }
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
+ return;
}
- if (instanceCount == 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEXED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.length = indexCount;
- prim.index_offset = index_offset;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- }
- } else if (instanceCount > 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.index_offset = index_offset;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- prim.number_of_instances = instanceCount;
- prim.instance_length = indexCount;
- }
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
-
/* drawCount is the number of draws to execute, and can be zero. */
if (drawCount == 0)
return;
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->vs->topology);
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
- cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.number_of_draw_indirect_array_records = drawCount;
- prim.stride_in_multiples_of_4_bytes = stride >> 2;
- prim.address = v3dv_cl_address(buffer->mem->bo,
- buffer->mem_offset + offset);
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
-
/* drawCount is the number of draws to execute, and can be zero. */
if (drawCount == 0)
return;
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->vs->topology);
- uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
- cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- prim.number_of_draw_indirect_indexed_records = drawCount;
- prim.stride_in_multiples_of_4_bytes = stride >> 2;
- prim.address = v3dv_cl_address(buffer->mem->bo,
- buffer->mem_offset + offset);
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
get_index_size(VkIndexType index_type)
{
switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return 1;
+ break;
case VK_INDEX_TYPE_UINT16:
return 2;
break;
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
VkIndexType indexType)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, ibuffer, buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP));
- v3dv_return_if_oom(cmd_buffer, NULL);
const uint32_t index_size = get_index_size(indexType);
-
- /* If we have started a new job we always need to emit index buffer state.
- * We know we are in that scenario because that is the only case where we
- * set the dirty bit.
- */
- if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_INDEX_BUFFER)) {
- if (buffer == cmd_buffer->state.index_buffer.buffer &&
- offset == cmd_buffer->state.index_buffer.offset &&
- index_size == cmd_buffer->state.index_buffer.index_size) {
- return;
- }
- }
-
- cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
- ib.address = v3dv_cl_address(ibuffer->mem->bo,
- ibuffer->mem_offset + offset);
- ib.size = ibuffer->mem->bo->size;
+ if (buffer == cmd_buffer->state.index_buffer.buffer &&
+ offset == cmd_buffer->state.index_buffer.offset &&
+ index_size == cmd_buffer->state.index_buffer.index_size) {
+ return;
}
cmd_buffer->state.index_buffer.buffer = buffer;
cmd_buffer->state.index_buffer.offset = offset;
cmd_buffer->state.index_buffer.index_size = index_size;
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t compareMask)
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t writeMask)
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t reference)
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer,
float depthBiasConstantFactor,
float depthBiasClamp,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->state.dynamic.depth_bias.constant_factor = depthBiasConstantFactor;
+ cmd_buffer->state.dynamic.depth_bias.depth_bias_clamp = depthBiasClamp;
cmd_buffer->state.dynamic.depth_bias.slope_factor = depthBiasSlopeFactor;
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
float minDepthBounds,
float maxDepthBounds)
*/
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer,
float lineWidth)
{
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
assert(firstSet + descriptorSetCount <= MAX_SETS);
struct v3dv_descriptor_state *descriptor_state =
- &cmd_buffer->state.descriptor_state[pipelineBindPoint];
+ pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE ?
+ &cmd_buffer->state.compute.descriptor_state :
+ &cmd_buffer->state.gfx.descriptor_state;
+ VkShaderStageFlags dirty_stages = 0;
bool descriptor_state_changed = false;
for (uint32_t i = 0; i < descriptorSetCount; i++) {
V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]);
uint32_t index = firstSet + i;
+ descriptor_state->valid |= (1u << index);
if (descriptor_state->descriptor_sets[index] != set) {
descriptor_state->descriptor_sets[index] = set;
- descriptor_state_changed = true;
- }
-
- if (!(descriptor_state->valid & (1u << index))) {
- descriptor_state->valid |= (1u << index);
+ dirty_stages |= set->layout->shader_stages;
descriptor_state_changed = true;
}
if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) {
descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index];
+ dirty_stages |= set->layout->shader_stages;
descriptor_state_changed = true;
}
}
}
if (descriptor_state_changed) {
- if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
- else
+ cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
+ } else {
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
+ cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+ }
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdPushConstants(VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- if (!memcmp(cmd_buffer->push_constants_data + offset, pValues, size))
+ if (!memcmp((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size))
return;
- memcpy((void*) cmd_buffer->push_constants_data + offset, pValues, size);
+ memcpy((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size);
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+ cmd_buffer->state.dirty_push_constants_stages |= stageFlags;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
const float blendConstants[4])
{
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS;
}
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkBool32 *pColorWriteEnables)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ uint32_t color_write_enable = 0;
+
+ for (uint32_t i = 0; i < attachmentCount; i++)
+ color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
+
+ if (state->dynamic.color_write_enable == color_write_enable)
+ return;
+
+ state->dynamic.color_write_enable = color_write_enable;
+
+ state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
+}
+
void
v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-static void
-ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t slot_size,
- uint32_t used_count,
- uint32_t *alloc_count,
- void **ptr)
+void
+v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t slot_size,
+ uint32_t used_count,
+ uint32_t *alloc_count,
+ void **ptr)
{
if (used_count >= *alloc_count) {
const uint32_t prev_slot_count = *alloc_count;
const uint32_t new_slot_count = MAX2(*alloc_count * 2, 4);
const uint32_t bytes = new_slot_count * slot_size;
- *ptr = vk_alloc(&cmd_buffer->device->alloc, bytes, 8,
+ *ptr = vk_alloc(&cmd_buffer->device->vk.alloc, bytes, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (*ptr == NULL) {
fprintf(stderr, "Error: failed to allocate CPU buffer for query.\n");
VkQueryControlFlags flags)
{
/* FIXME: we only support one active query for now */
- assert(cmd_buffer->state.query.active_query == NULL);
+ assert(cmd_buffer->state.query.active_query.bo == NULL);
assert(query < pool->query_count);
- cmd_buffer->state.query.active_query = pool->queries[query].bo;
+ cmd_buffer->state.query.active_query.bo = pool->queries[query].bo;
+ cmd_buffer->state.query.active_query.offset = pool->queries[query].offset;
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
uint32_t query)
{
assert(query < pool->query_count);
- assert(cmd_buffer->state.query.active_query != NULL);
+ assert(cmd_buffer->state.query.active_query.bo != NULL);
if (cmd_buffer->state.pass) {
/* Queue the EndQuery in the command buffer state, we will create a CPU
* render pass job in which they have been recorded.
*/
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- ensure_array_state(cmd_buffer,
- sizeof(struct v3dv_end_query_cpu_job_info),
- state->query.end.used_count,
- &state->query.end.alloc_count,
- (void **) &state->query.end.states);
+ v3dv_cmd_buffer_ensure_array_state(cmd_buffer,
+ sizeof(struct v3dv_end_query_cpu_job_info),
+ state->query.end.used_count,
+ &state->query.end.alloc_count,
+ (void **) &state->query.end.states);
v3dv_return_if_oom(cmd_buffer, NULL);
struct v3dv_end_query_cpu_job_info *info =
info->pool = pool;
info->query = query;
+
+ /* From the Vulkan spec:
+ *
+ * "If queries are used while executing a render pass instance that has
+ * multiview enabled, the query uses N consecutive query indices in
+ * the query pool (starting at query) where N is the number of bits set
+ * in the view mask in the subpass the query is used in. How the
+ * numerical results of the query are distributed among the queries is
+ * implementation-dependent."
+ *
+ * In our case, only the first query is used but this means we still need
+ * to flag the other queries as available so we don't emit errors when
+ * the applications attempt to retrive values from them.
+ */
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (!pass->multiview_enabled) {
+ info->count = 1;
+ } else {
+ struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
+ info->count = util_bitcount(subpass->view_mask);
+ }
} else {
/* Otherwise, schedule the CPU job immediately */
struct v3dv_job *job =
job->cpu.query_end.pool = pool;
job->cpu.query_end.query = query;
+
+ /* Multiview queries cannot cross subpass boundaries */
+ job->cpu.query_end.count = 1;
+
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
- cmd_buffer->state.query.active_query = NULL;
+ cmd_buffer->state.query.active_query.bo = NULL;
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
struct drm_v3d_submit_tfu *tfu)
{
struct v3dv_device *device = cmd_buffer->device;
- struct v3dv_job *job = vk_zalloc(&device->alloc,
+ struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!job) {
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetEvent(VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResetEvent(VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent *pEvents,
const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount;
job->cpu.event_wait.events =
- vk_alloc(&cmd_buffer->device->alloc, event_list_size, 8,
+ vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!job->cpu.event_wait.events) {
v3dv_flag_oom(cmd_buffer, NULL);
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage,
VkQueryPool queryPool,
uint32_t query)
{
- unreachable("Timestamp queries are not supported.");
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
+
+ /* If this is called inside a render pass we need to finish the current
+ * job here...
+ */
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (pass)
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
+ V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
+ cmd_buffer, -1);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ job->cpu.query_timestamp.pool = query_pool;
+ job->cpu.query_timestamp.query = query;
+
+ if (!pass || !pass->multiview_enabled) {
+ job->cpu.query_timestamp.count = 1;
+ } else {
+ struct v3dv_subpass *subpass =
+ &pass->subpasses[cmd_buffer->state.subpass_idx];
+ job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
+ }
+
+ list_addtail(&job->list_link, &cmd_buffer->jobs);
+ cmd_buffer->state.job = NULL;
+
+ /* ...and resume the subpass after the timestamp */
+ if (cmd_buffer->state.pass)
+ v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
}
static void
cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer)
{
- assert(cmd_buffer->state.pipeline);
- assert(cmd_buffer->state.pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
-
- /* We may need to compile shader variants based on bound textures */
- uint32_t *dirty = &cmd_buffer->state.dirty;
- if (*dirty & (V3DV_CMD_DIRTY_PIPELINE |
- V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS)) {
- update_pipeline_variants(cmd_buffer);
- }
+ assert(cmd_buffer->state.compute.pipeline);
+ assert(cmd_buffer->state.compute.pipeline->active_stages ==
+ VK_SHADER_STAGE_COMPUTE_BIT);
- *dirty &= ~(V3DV_CMD_DIRTY_PIPELINE |
- V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
+ cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE |
+ V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
+ cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
+ cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
/* Make sure the GPU is not currently accessing the indirect CL for this
* job, since we are about to overwrite some of the uniform data.
*/
- const uint64_t infinite = 0xffffffffffffffffull;
- v3dv_bo_wait(job->device, job->indirect.bo, infinite);
+ v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE);
for (uint32_t i = 0; i < 3; i++) {
if (info->wg_uniform_offsets[i]) {
static struct v3dv_job *
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t base_offset_x,
+ uint32_t base_offset_y,
+ uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z,
uint32_t **wg_uniform_offsets_out,
uint32_t *wg_size_out)
{
- struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
- assert(pipeline && pipeline->cs && pipeline->cs->nir);
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
+ assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
+ struct v3dv_shader_variant *cs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE];
- struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->alloc,
+ struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!job) {
job->csd.wg_count[1] = group_count_y;
job->csd.wg_count[2] = group_count_z;
+ job->csd.wg_base[0] = base_offset_x;
+ job->csd.wg_base[1] = base_offset_y;
+ job->csd.wg_base[2] = base_offset_z;
+
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
- const struct nir_shader *cs = pipeline->cs->nir;
-
- const uint32_t wgs_per_sg = 1; /* FIXME */
- const uint32_t wg_size = cs->info.cs.local_size[0] *
- cs->info.cs.local_size[1] *
- cs->info.cs.local_size[2];
- submit->cfg[3] |= wgs_per_sg << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
- submit->cfg[3] |= ((DIV_ROUND_UP(wgs_per_sg * wg_size, 16) - 1) <<
- V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT);
+ const struct v3d_compute_prog_data *cpd =
+ cs_variant->prog_data.cs;
+
+ const uint32_t num_wgs = group_count_x * group_count_y * group_count_z;
+ const uint32_t wg_size = cpd->local_size[0] *
+ cpd->local_size[1] *
+ cpd->local_size[2];
+
+ uint32_t wgs_per_sg =
+ v3d_csd_choose_workgroups_per_supergroup(
+ &cmd_buffer->device->devinfo,
+ cs_variant->prog_data.cs->has_subgroups,
+ cs_variant->prog_data.cs->base.has_control_barrier,
+ cs_variant->prog_data.cs->base.threads,
+ num_wgs, wg_size);
+
+ uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16);
+ uint32_t whole_sgs = num_wgs / wgs_per_sg;
+ uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg;
+ uint32_t num_batches = batches_per_sg * whole_sgs +
+ DIV_ROUND_UP(rem_wgs * wg_size, 16);
+
+ submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
+ submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT;
submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT;
if (wg_size_out)
*wg_size_out = wg_size;
- uint32_t batches_per_wg = DIV_ROUND_UP(wg_size, 16);
- submit->cfg[4] = batches_per_wg *
- (group_count_x * group_count_y * group_count_z) - 1;
+ submit->cfg[4] = num_batches - 1;
assert(submit->cfg[4] != ~0);
- assert(pipeline->cs->current_variant &&
- pipeline->cs->current_variant->assembly_bo);
- const struct v3dv_shader_variant *variant = pipeline->cs->current_variant;
- submit->cfg[5] = variant->assembly_bo->offset;
+ assert(pipeline->shared_data->assembly_bo);
+ struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo;
+
+ submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset;
submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
- if (variant->prog_data.base->single_seg)
+ if (cs_variant->prog_data.base->single_seg)
submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
- if (variant->prog_data.base->threads == 4)
+ if (cs_variant->prog_data.base->threads == 4)
submit->cfg[5] |= V3D_CSD_CFG5_THREADING;
- if (variant->prog_data.cs->shared_size > 0) {
+ if (cs_variant->prog_data.cs->shared_size > 0) {
job->csd.shared_memory =
v3dv_bo_alloc(cmd_buffer->device,
- variant->prog_data.cs->shared_size * wgs_per_sg,
+ cs_variant->prog_data.cs->shared_size * wgs_per_sg,
"shared_vars", true);
if (!job->csd.shared_memory) {
v3dv_flag_oom(cmd_buffer, NULL);
}
}
- v3dv_job_add_bo(job, variant->assembly_bo);
-
+ v3dv_job_add_bo_unchecked(job, cs_assembly_bo);
struct v3dv_cl_reloc uniforms =
- v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline->cs,
+ v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline,
+ cs_variant,
wg_uniform_offsets_out);
submit->cfg[6] = uniforms.bo->offset + uniforms.offset;
static void
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t base_offset_x,
+ uint32_t base_offset_y,
+ uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z)
struct v3dv_job *job =
cmd_buffer_create_csd_job(cmd_buffer,
+ base_offset_x,
+ base_offset_y,
+ base_offset_z,
group_count_x,
group_count_y,
group_count_z,
cmd_buffer->state.job = NULL;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
uint32_t groupCountX,
uint32_t groupCountY,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_emit_pre_dispatch(cmd_buffer);
- cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
+ cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
+ groupCountX, groupCountY, groupCountZ);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
+ uint32_t baseGroupX,
+ uint32_t baseGroupY,
+ uint32_t baseGroupZ,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer_emit_pre_dispatch(cmd_buffer);
+ cmd_buffer_dispatch(cmd_buffer,
+ baseGroupX, baseGroupY, baseGroupZ,
+ groupCountX, groupCountY, groupCountZ);
}
+
static void
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
*/
struct v3dv_job *csd_job =
cmd_buffer_create_csd_job(cmd_buffer,
+ 0, 0, 0,
1, 1, 1,
&job->cpu.csd_indirect.wg_uniform_offsets[0],
&job->cpu.csd_indirect.wg_size);
cmd_buffer->state.job = NULL;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset)
cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset);
}
-void
-v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageResolve *pRegions)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
{
- unreachable("vkCmdResolveImage not implemented");
+ /* Nothing to do here since we only support a single device */
+ assert(deviceMask == 0x1);
}