#include "v3dv_private.h"
#include "util/u_pack_color.h"
-#include "vk_format_info.h"
#include "vk_util.h"
-const struct v3dv_dynamic_state default_dynamic_state = {
- .viewport = {
- .count = 0,
- },
- .scissor = {
- .count = 0,
- },
- .stencil_compare_mask =
- {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_write_mask =
- {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_reference =
- {
- .front = 0u,
- .back = 0u,
- },
- .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
- .depth_bias = {
- .constant_factor = 0.0f,
- .depth_bias_clamp = 0.0f,
- .slope_factor = 0.0f,
- },
- .line_width = 1.0f,
-};
-
void
v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
{
pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
VK_OBJECT_TYPE_COMMAND_POOL);
if (pool == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (pAllocator)
pool->alloc = *pAllocator;
* buffer reset that would reset the loader's dispatch table for the
* command buffer, and any other relevant info from vk_object_base
*/
- const uint32_t base_size = sizeof(struct vk_object_base);
+ const uint32_t base_size = sizeof(struct vk_command_buffer);
uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size;
memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size);
VkCommandBuffer *pCommandBuffer)
{
struct v3dv_cmd_buffer *cmd_buffer;
- cmd_buffer = vk_object_zalloc(&device->vk,
- &pool->alloc,
- sizeof(*cmd_buffer),
- VK_OBJECT_TYPE_COMMAND_BUFFER);
+ cmd_buffer = vk_zalloc2(&device->vk.alloc,
+ &pool->alloc,
+ sizeof(*cmd_buffer),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result;
+ result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
+ return result;
+ }
cmd_buffer_init(cmd_buffer, device, pool, level);
{
list_del(&cmd_buffer->pool_link);
cmd_buffer_free_resources(cmd_buffer);
- vk_object_free(&cmd_buffer->device->vk, &cmd_buffer->pool->alloc, cmd_buffer);
+ vk_command_buffer_finish(&cmd_buffer->vk);
+ vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc,
+ cmd_buffer);
}
static bool
struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx];
struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx];
+ /* Don't merge if the subpasses have different view masks, since in that
+ * case the framebuffer setup is different and we need to emit different
+ * RCLs.
+ */
+ if (subpass->view_mask != prev_subpass->view_mask)
+ return false;
+
/* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED,
* we need to check that for each subpass all its used attachments are
* used by the other subpass.
uint8_t max_internal_bpp,
bool msaa)
{
- static const uint8_t tile_sizes[] = {
- 64, 64,
- 64, 32,
- 32, 32,
- 32, 16,
- 16, 16,
- 16, 8,
- 8, 8
- };
-
assert(job);
struct v3dv_frame_tiling *tiling = &job->frame_tiling;
tiling->layers = layers;
tiling->render_target_count = render_target_count;
tiling->msaa = msaa;
+ tiling->internal_bpp = max_internal_bpp;
- uint32_t tile_size_index = 0;
-
- if (render_target_count > 2)
- tile_size_index += 2;
- else if (render_target_count > 1)
- tile_size_index += 1;
-
- if (msaa)
- tile_size_index += 2;
+ /* We can use double-buffer when MSAA is disabled to reduce tile store
+ * overhead.
+ *
+ * FIXME: if we are emitting any tile loads the hardware will serialize
+ * loads and stores across tiles effectivley disabling double buffering,
+ * so we would want to check for that and not enable it in that case to
+ * avoid reducing the tile size.
+ */
+ tiling->double_buffer =
+ unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
- tiling->internal_bpp = max_internal_bpp;
- tile_size_index += tiling->internal_bpp;
- assert(tile_size_index < ARRAY_SIZE(tile_sizes) / 2);
+ assert(!tiling->msaa || !tiling->double_buffer);
- tiling->tile_width = tile_sizes[tile_size_index * 2];
- tiling->tile_height = tile_sizes[tile_size_index * 2 + 1];
+ v3d_choose_tile_size(render_target_count, max_internal_bpp,
+ tiling->msaa, tiling->double_buffer,
+ &tiling->tile_width, &tiling->tile_height);
tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height);
uint32_t width,
uint32_t height,
uint32_t layers,
+ bool allocate_tile_state_for_all_layers,
uint32_t render_target_count,
uint8_t max_internal_bpp,
bool msaa)
v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
v3dv_return_if_oom(NULL, job);
+ /* We only need to allocate tile state for all layers if the binner
+ * writes primitives to layers other than the first. This can only be
+ * done using layered rendering (writing gl_Layer from a geometry shader),
+ * so for other cases of multilayered framebuffers (typically with
+ * meta copy/clear operations) that won't use layered rendering, we only
+ * need one layer worth of of tile state for the binner.
+ */
+ if (!allocate_tile_state_for_all_layers)
+ layers = 1;
+
/* The PTB will request the tile alloc initial size per tile at start
* of tile binning.
*/
v3dv_cl_init(job, &job->indirect);
- if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH))
job->always_flush = true;
}
cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
VkCommandBufferResetFlags flags)
{
+ vk_command_buffer_reset(&cmd_buffer->vk);
if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
struct v3dv_device *device = cmd_buffer->device;
struct v3dv_cmd_pool *pool = cmd_buffer->pool;
.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
.srcSubresource = {
VK_IMAGE_ASPECT_COLOR_BIT,
- src_iview->base_level,
- src_iview->first_layer,
- src_iview->last_layer - src_iview->first_layer + 1,
+ src_iview->vk.base_mip_level,
+ src_iview->vk.base_array_layer,
+ src_iview->vk.layer_count,
},
.srcOffset = { 0, 0, 0 },
.dstSubresource = {
VK_IMAGE_ASPECT_COLOR_BIT,
- dst_iview->base_level,
- dst_iview->first_layer,
- dst_iview->last_layer - dst_iview->first_layer + 1,
+ dst_iview->vk.base_mip_level,
+ dst_iview->vk.base_array_layer,
+ dst_iview->vk.layer_count,
},
.dstOffset = { 0, 0, 0 },
- .extent = src_iview->image->extent,
+ .extent = src_iview->vk.image->extent,
};
+ struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image;
+ struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image;
VkResolveImageInfo2KHR resolve_info = {
.sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR,
- .srcImage = v3dv_image_to_handle(src_iview->image),
+ .srcImage = v3dv_image_to_handle(src_image),
.srcImageLayout = VK_IMAGE_LAYOUT_GENERAL,
- .dstImage = v3dv_image_to_handle(dst_iview->image),
+ .dstImage = v3dv_image_to_handle(dst_image),
.dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
.regionCount = 1,
.pRegions = ®ion,
}
VKAPI_ATTR void VKAPI_CALL
-v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
- const VkRenderPassBeginInfo *pRenderPassBegin,
- VkSubpassContents contents)
+v3dv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBegin,
+ const VkSubpassBeginInfo *pSubpassBeginInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass);
}
VKAPI_ATTR void VKAPI_CALL
-v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
+v3dv_CmdNextSubpass2(VkCommandBuffer commandBuffer,
+ const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
(framebuffer, subpass, &internal_bpp, &msaa);
+ /* From the Vulkan spec:
+ *
+ * "If the render pass uses multiview, then layers must be one and
+ * each attachment requires a number of layers that is greater than
+ * the maximum bit index set in the view mask in the subpasses in
+ * which it is used."
+ *
+ * So when multiview is enabled, we take the number of layers from the
+ * last bit set in the view mask.
+ */
+ uint32_t layers = framebuffer->layers;
+ if (subpass->view_mask != 0) {
+ assert(framebuffer->layers == 1);
+ layers = util_last_bit(subpass->view_mask);
+ }
+
v3dv_job_start_frame(job,
framebuffer->width,
framebuffer->height,
- framebuffer->layers,
+ layers,
+ true,
subpass->color_count,
internal_bpp,
msaa);
}
VKAPI_ATTR void VKAPI_CALL
-v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
+v3dv_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
list_for_each_entry(struct v3dv_job, secondary_job,
&secondary->jobs, list_link) {
/* These can only happen inside a render pass */
- assert(secondary_job->type != V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS);
assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY);
struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
if (!job)
}
}
+ if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
+ if (dest->color_write_enable != src->color_write_enable) {
+ dest->color_write_enable = src->color_write_enable;
+ dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
+ }
+ }
+
cmd_buffer->state.dynamic.mask = dynamic_mask;
cmd_buffer->state.dirty |= dirty;
}
const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT;
const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS;
const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
+ const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX;
/* VK_SHADER_STAGE_FRAGMENT_BIT */
const bool has_new_descriptors_fs =
(cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
const bool needs_fs_update = has_new_pipeline ||
+ has_new_view_index ||
has_new_push_constants_fs ||
- has_new_descriptors_fs;
+ has_new_descriptors_fs ||
+ has_new_view_index;
if (needs_fs_update) {
struct v3dv_shader_variant *fs_variant =
VK_SHADER_STAGE_GEOMETRY_BIT);
const bool needs_gs_update = has_new_viewport ||
+ has_new_view_index ||
has_new_pipeline ||
has_new_push_constants_gs ||
has_new_descriptors_gs;
(cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT);
const bool needs_vs_update = has_new_viewport ||
+ has_new_view_index ||
has_new_pipeline ||
has_new_push_constants_vs ||
has_new_descriptors_vs;
cmd_buffer->state.uniforms.vs_bin =
v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant);
}
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX;
}
/* This stores command buffer state that we might be about to stomp for
old_job->frame_tiling.width,
old_job->frame_tiling.height,
old_job->frame_tiling.layers,
+ true,
old_job->frame_tiling.render_target_count,
old_job->frame_tiling.internal_bpp,
true /* msaa */);
*dirty & (V3DV_CMD_DIRTY_PIPELINE |
V3DV_CMD_DIRTY_PUSH_CONSTANTS |
V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
- V3DV_CMD_DIRTY_VIEWPORT);
+ V3DV_CMD_DIRTY_VIEWPORT |
+ V3DV_CMD_DIRTY_VIEW_INDEX);
if (dirty_uniform_state)
update_gfx_uniform_state(cmd_buffer, dirty_uniform_state);
if (*dirty & V3DV_CMD_DIRTY_PIPELINE)
v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer);
+ if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE))
+ v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer);
+
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE;
}
+static inline void
+cmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t view_index)
+{
+ cmd_buffer->state.view_index = view_index;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX;
+}
+
static void
cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_draw_info *info)
{
- v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
- v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ }
}
VKAPI_ATTR void VKAPI_CALL
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
- (cmd_buffer, indexCount, instanceCount,
- firstIndex, vertexOffset, firstInstance);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
+ }
}
VKAPI_ATTR void VKAPI_CALL
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
- (cmd_buffer, buffer, offset, drawCount, stride);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ }
}
VKAPI_ATTR void VKAPI_CALL
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
- (cmd_buffer, buffer, offset, drawCount, stride);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ }
}
VKAPI_ATTR void VKAPI_CALL
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS;
}
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkBool32 *pColorWriteEnables)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ uint32_t color_write_enable = 0;
+
+ for (uint32_t i = 0; i < attachmentCount; i++)
+ color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
+
+ if (state->dynamic.color_write_enable == color_write_enable)
+ return;
+
+ state->dynamic.color_write_enable = color_write_enable;
+
+ state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
+}
+
void
v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
info->pool = pool;
info->query = query;
+
+ /* From the Vulkan spec:
+ *
+ * "If queries are used while executing a render pass instance that has
+ * multiview enabled, the query uses N consecutive query indices in
+ * the query pool (starting at query) where N is the number of bits set
+ * in the view mask in the subpass the query is used in. How the
+ * numerical results of the query are distributed among the queries is
+ * implementation-dependent."
+ *
+ * In our case, only the first query is used but this means we still need
+ * to flag the other queries as available so we don't emit errors when
+ * the applications attempt to retrive values from them.
+ */
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (!pass->multiview_enabled) {
+ info->count = 1;
+ } else {
+ struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
+ info->count = util_bitcount(subpass->view_mask);
+ }
} else {
/* Otherwise, schedule the CPU job immediately */
struct v3dv_job *job =
job->cpu.query_end.pool = pool;
job->cpu.query_end.query = query;
+
+ /* Multiview queries cannot cross subpass boundaries */
+ job->cpu.query_end.count = 1;
+
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
/* If this is called inside a render pass we need to finish the current
* job here...
*/
- if (cmd_buffer->state.pass)
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (pass)
v3dv_cmd_buffer_finish_job(cmd_buffer);
struct v3dv_job *job =
job->cpu.query_timestamp.pool = query_pool;
job->cpu.query_timestamp.query = query;
+ if (!pass || !pass->multiview_enabled) {
+ job->cpu.query_timestamp.count = 1;
+ } else {
+ struct v3dv_subpass *subpass =
+ &pass->subpasses[cmd_buffer->state.subpass_idx];
+ job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
+ }
+
list_addtail(&job->list_link, &cmd_buffer->jobs);
cmd_buffer->state.job = NULL;