From 18319a236c18bfc4f91cb0193172fcf784ee81cd Mon Sep 17 00:00:00 2001 From: Ella Stanforth Date: Thu, 28 Jul 2022 08:15:43 +0000 Subject: [PATCH] v3dv: add support for multi-planar formats, enable YCbCr MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Original patches wrote by Ella Stanforth. Alejandro Piñeiro main changes (skipping the small fixes/typos): * Reduced the list of supported formats to VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM and VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, that are the two only mandatory by the spec. * Fix format features exposed with YCbCr: * Disallow some features not supported with YCbCr (like blitting) * Disallow storage image support. Not clear if really useful. Even if there are CTS tests, there is an ongoing discussion about the possibility to remove them. * Expose VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT, that is mandatory for the formats supported. * Not expose VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT. Some CTS tests are failing right now, and it is not mandatory. Likely to be revisit later. * We are keeping VK_FORMAT_FEATURE_2_DISJOINT_BIT and VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT. Even if they are optional, it is working with the two formats that we are exposing. Likely that will need to be refined if we start to expose more formats. * create_image_view: don't use hardcoded 0x70, but instead doing an explicit bit or of VK_IMAGE_ASPECT_PLANE_0/1/2_BIT * image_format_plane_features: keep how supported aspects and separate stencil check is done. Even if the change introduced was correct (not sure about that though), that change is unrelated to this work * write_image_descriptor: add additional checks for descriptor type, to compute properly the offset. * Cosmetic changes (don't use // for comments, capital letters, etc) * Main changes coming from the review: * Not use image aliases. All the info is already on the image planes, and some points of the code were confusing as it was using always a hardcoded plane 0. * Squashed the two original main patches. YCbCr conversion was leaking on the multi-planar support, as some support needed info coming from the ycbcr structs. * Not expose the extension on Android, and explicitly assert that we expect plane_count to be 1 always. * For a full list of review changes see MR#19950 Signed-off-by: Ella Stanforth Signed-off-by: Alejandro Piñeiro Part-of: --- docs/features.txt | 2 +- src/broadcom/vulkan/v3dv_android.c | 3 +- src/broadcom/vulkan/v3dv_cmd_buffer.c | 5 +- src/broadcom/vulkan/v3dv_descriptor_set.c | 189 ++++++++----- src/broadcom/vulkan/v3dv_device.c | 121 ++++++++- src/broadcom/vulkan/v3dv_formats.c | 144 ++++++++-- src/broadcom/vulkan/v3dv_image.c | 215 +++++++++++---- src/broadcom/vulkan/v3dv_meta_clear.c | 8 +- src/broadcom/vulkan/v3dv_meta_copy.c | 412 ++++++++++++++++++++--------- src/broadcom/vulkan/v3dv_pass.c | 4 +- src/broadcom/vulkan/v3dv_pipeline.c | 74 +++++- src/broadcom/vulkan/v3dv_private.h | 158 ++++++++--- src/broadcom/vulkan/v3dv_queue.c | 9 +- src/broadcom/vulkan/v3dvx_cmd_buffer.c | 85 ++++-- src/broadcom/vulkan/v3dvx_descriptor_set.c | 10 +- src/broadcom/vulkan/v3dvx_device.c | 23 +- src/broadcom/vulkan/v3dvx_formats.c | 84 ++++-- src/broadcom/vulkan/v3dvx_image.c | 153 +++++------ src/broadcom/vulkan/v3dvx_meta_common.c | 71 +++-- src/broadcom/vulkan/v3dvx_pipeline.c | 8 +- src/broadcom/vulkan/v3dvx_private.h | 6 +- 21 files changed, 1306 insertions(+), 478 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index e5efeb0..4b33238 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -439,7 +439,7 @@ Vulkan 1.1 -- all DONE: anv, lvp, radv, tu, vn VK_KHR_maintenance3 DONE (anv, lvp, radv, tu, v3dv, vn) VK_KHR_multiview DONE (anv, lvp, radv, tu, v3dv, vn) VK_KHR_relaxed_block_layout DONE (anv, lvp, radv, tu, v3dv, vn) - VK_KHR_sampler_ycbcr_conversion DONE (anv, radv, tu, vn) + VK_KHR_sampler_ycbcr_conversion DONE (anv, radv, tu, v3dv, vn) VK_KHR_shader_draw_parameters DONE (anv, dzn, lvp, radv, tu, vn) VK_KHR_storage_buffer_storage_class DONE (anv, lvp, panvk, radv, tu, v3dv, vn) VK_KHR_variable_pointers DONE (anv, lvp, panvk, radv, tu, v3dv, vn) diff --git a/src/broadcom/vulkan/v3dv_android.c b/src/broadcom/vulkan/v3dv_android.c index af78ee3..cbf1dba 100644 --- a/src/broadcom/vulkan/v3dv_android.c +++ b/src/broadcom/vulkan/v3dv_android.c @@ -256,12 +256,13 @@ v3dv_import_native_buffer_fd(VkDevice device_h, .fd = os_dupfd_cloexec(native_buffer_fd), }; + assert(image->plane_count == 1); result = v3dv_AllocateMemory(device_h, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &import_info, - .allocationSize = image->size, + .allocationSize = image->planes[0].size, .memoryTypeIndex = 0, }, alloc, &memory_h); diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 2edf161..5092927 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -1157,16 +1157,17 @@ cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer, const VkClearColorValue *color) { assert(attachment_idx < cmd_buffer->state.pass->attachment_count); - const struct v3dv_render_pass_attachment *attachment = &cmd_buffer->state.pass->attachments[attachment_idx]; uint32_t internal_type, internal_bpp; const struct v3dv_format *format = v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format); + /* We don't allow multi-planar formats for render pass attachments */ + assert(format->plane_count == 1); v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format) - (format->rt_type, &internal_type, &internal_bpp); + (format->planes[0].rt_type, &internal_type, &internal_bpp); uint32_t internal_size = 4 << internal_bpp; diff --git a/src/broadcom/vulkan/v3dv_descriptor_set.c b/src/broadcom/vulkan/v3dv_descriptor_set.c index d3430d0..c53e93d 100644 --- a/src/broadcom/vulkan/v3dv_descriptor_set.c +++ b/src/broadcom/vulkan/v3dv_descriptor_set.c @@ -28,8 +28,8 @@ /* * For a given descriptor defined by the descriptor_set it belongs, its - * binding layout, and array_index, it returns the map region assigned to it - * from the descriptor pool bo. + * binding layout, array_index, and plane, it returns the map region assigned + * to it from the descriptor pool bo. */ static void * descriptor_bo_map(struct v3dv_device *device, @@ -47,7 +47,7 @@ descriptor_bo_map(struct v3dv_device *device, return set->pool->bo->map + set->base_offset + binding_layout->descriptor_offset + - array_index * bo_size; + array_index * binding_layout->plane_stride * bo_size; } static bool @@ -132,8 +132,11 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device, const struct v3dv_descriptor_set_binding_layout *binding_layout = &set->layout->binding[binding_number]; + + uint32_t bo_size = v3dv_X(device, descriptor_bo_size)(binding_layout->type); + assert(binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK || - v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0); + bo_size > 0); if (out_type) *out_type = binding_layout->type; @@ -143,7 +146,7 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device, struct v3dv_cl_reloc reloc = { .bo = set->pool->bo, .offset = set->base_offset + binding_layout->descriptor_offset + - array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type), + array_index * binding_layout->plane_stride * bo_size, }; return reloc; @@ -222,7 +225,7 @@ v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device, type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - reloc.offset += v3dv_X(device, combined_image_sampler_sampler_state_offset)(); + reloc.offset += v3dv_X(device, combined_image_sampler_sampler_state_offset)(map->plane[index]); return reloc; } @@ -250,7 +253,8 @@ v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_stat assert(descriptor->image_view); struct v3dv_image *image = (struct v3dv_image *) descriptor->image_view->vk.image; - return image->mem->bo; + assert(map->plane[index] < image->plane_count); + return image->planes[map->plane[index]].mem->bo; } default: unreachable("descriptor type doesn't has a texture bo"); @@ -279,7 +283,7 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device, type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - reloc.offset += v3dv_X(device, combined_image_sampler_texture_state_offset)(); + reloc.offset += v3dv_X(device, combined_image_sampler_texture_state_offset)(map->plane[index]); return reloc; } @@ -287,8 +291,21 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device, #define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x)); static void +sha1_update_ycbcr_conversion(struct mesa_sha1 *ctx, + const struct vk_ycbcr_conversion *conversion) +{ + SHA1_UPDATE_VALUE(ctx, conversion->format); + SHA1_UPDATE_VALUE(ctx, conversion->ycbcr_model); + SHA1_UPDATE_VALUE(ctx, conversion->ycbcr_range); + SHA1_UPDATE_VALUE(ctx, conversion->mapping); + SHA1_UPDATE_VALUE(ctx, conversion->chroma_offsets); + SHA1_UPDATE_VALUE(ctx, conversion->chroma_reconstruction); +} + +static void sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx, - const struct v3dv_descriptor_set_binding_layout *layout) + const struct v3dv_descriptor_set_binding_layout *layout, + const struct v3dv_descriptor_set_layout *set_layout) { SHA1_UPDATE_VALUE(ctx, layout->type); SHA1_UPDATE_VALUE(ctx, layout->array_size); @@ -297,6 +314,18 @@ sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx, SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index); SHA1_UPDATE_VALUE(ctx, layout->descriptor_offset); SHA1_UPDATE_VALUE(ctx, layout->immutable_samplers_offset); + SHA1_UPDATE_VALUE(ctx, layout->plane_stride); + + if (layout->immutable_samplers_offset) { + const struct v3dv_sampler *immutable_samplers = + v3dv_immutable_samplers(set_layout, layout); + + for (unsigned i = 0; i < layout->array_size; i++) { + const struct v3dv_sampler *sampler = &immutable_samplers[i]; + if (sampler->conversion) + sha1_update_ycbcr_conversion(ctx, sampler->conversion); + } + } } static void @@ -310,7 +339,7 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count); for (uint16_t i = 0; i < layout->binding_count; i++) - sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i]); + sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i], layout); } @@ -632,6 +661,13 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, uint32_t num_bindings = 0; uint32_t immutable_sampler_count = 0; + + /* for immutable descriptors, the plane stride is the largest plane + * count of all combined image samplers. For mutable descriptors + * this is always 1 since multiplanar images are restricted to + * immutable combined image samplers. + */ + uint8_t plane_stride = 1; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1); @@ -650,7 +686,15 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, if ((desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || desc_type == VK_DESCRIPTOR_TYPE_SAMPLER) && pCreateInfo->pBindings[j].pImmutableSamplers) { - immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + uint32_t descriptor_count = pCreateInfo->pBindings[j].descriptorCount; + immutable_sampler_count += descriptor_count; + + for (uint32_t i = 0; i < descriptor_count; i++) { + const VkSampler vk_sampler = + pCreateInfo->pBindings[j].pImmutableSamplers[i]; + VK_FROM_HANDLE(v3dv_sampler, sampler, vk_sampler); + plane_stride = MAX2(plane_stride, sampler->plane_count); + } } } @@ -728,6 +772,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, set_layout->binding[binding_number].array_size = binding->descriptorCount; set_layout->binding[binding_number].descriptor_index = descriptor_count; set_layout->binding[binding_number].dynamic_offset_index = dynamic_offset_count; + set_layout->binding[binding_number].plane_stride = plane_stride; if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) && @@ -740,6 +785,8 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, samplers += binding->descriptorCount; samplers_offset += sizeof(struct v3dv_sampler) * binding->descriptorCount; + + set_layout->binding[binding_number].plane_stride = plane_stride; } set_layout->shader_stages |= binding->stageFlags; @@ -754,7 +801,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, set_layout->bo_size; set_layout->bo_size += v3dv_X(device, descriptor_bo_size)(set_layout->binding[binding_number].type) * - binding->descriptorCount; + binding->descriptorCount * set_layout->binding[binding_number].plane_stride; } else { /* We align all our buffers, inline buffers too. We made sure to take * this account when calculating total BO size requirements at pool @@ -906,16 +953,18 @@ descriptor_set_create(struct v3dv_device *device, layout->binding[b].immutable_samplers_offset); for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { - uint32_t combined_offset = - layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? - v3dv_X(device, combined_image_sampler_sampler_state_offset)() : 0; - - void *desc_map = descriptor_bo_map(device, set, &layout->binding[b], i); - desc_map += combined_offset; - - memcpy(desc_map, - samplers[i].sampler_state, - sizeof(samplers[i].sampler_state)); + assert(samplers[i].plane_count <= V3DV_MAX_PLANE_COUNT); + for (uint8_t plane = 0; plane < samplers[i].plane_count; plane++) { + uint32_t combined_offset = + layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? + v3dv_X(device, combined_image_sampler_sampler_state_offset)(plane) : 0; + void *desc_map = + descriptor_bo_map(device, set, &layout->binding[b], i); + desc_map += combined_offset; + + memcpy(desc_map, samplers[i].sampler_state, + sizeof(samplers[i].sampler_state)); + } } } @@ -994,11 +1043,16 @@ descriptor_bo_copy(struct v3dv_device *device, uint32_t src_array_index) { assert(dst_binding_layout->type == src_binding_layout->type); + assert(src_binding_layout->plane_stride == dst_binding_layout->plane_stride); - void *dst_map = descriptor_bo_map(device, dst_set, dst_binding_layout, dst_array_index); - void *src_map = descriptor_bo_map(device, src_set, src_binding_layout, src_array_index); + void *dst_map = descriptor_bo_map(device, dst_set, dst_binding_layout, + dst_array_index); + void *src_map = descriptor_bo_map(device, src_set, src_binding_layout, + src_array_index); - memcpy(dst_map, src_map, v3dv_X(device, descriptor_bo_size)(src_binding_layout->type)); + memcpy(dst_map, src_map, + v3dv_X(device, descriptor_bo_size)(src_binding_layout->type) * + src_binding_layout->plane_stride); } static void @@ -1033,26 +1087,39 @@ write_image_descriptor(struct v3dv_device *device, descriptor->sampler = sampler; descriptor->image_view = iview; + assert(iview || sampler); + uint8_t plane_count = iview ? iview->plane_count : sampler->plane_count; + void *desc_map = descriptor_bo_map(device, set, binding_layout, array_index); - if (iview) { - const uint32_t tex_state_index = - iview->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY || - desc_type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ? 0 : 1; - memcpy(desc_map, - iview->texture_shader_state[tex_state_index], - sizeof(iview->texture_shader_state[0])); - desc_map += v3dv_X(device, combined_image_sampler_sampler_state_offset)(); - } + for (uint8_t plane = 0; plane < plane_count; plane++) { + if (iview) { + uint32_t offset = desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? + v3dv_X(device, combined_image_sampler_texture_state_offset)(plane) : 0; - if (sampler && !binding_layout->immutable_samplers_offset) { - /* For immutable samplers this was already done as part of the - * descriptor set create, as that info can't change later - */ - memcpy(desc_map, - sampler->sampler_state, - sizeof(sampler->sampler_state)); + void *plane_desc_map = desc_map + offset; + + const uint32_t tex_state_index = + iview->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY || + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ? 0 : 1; + memcpy(plane_desc_map, + iview->planes[plane].texture_shader_state[tex_state_index], + sizeof(iview->planes[plane].texture_shader_state[0])); + } + + if (sampler && !binding_layout->immutable_samplers_offset) { + uint32_t offset = desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? + v3dv_X(device, combined_image_sampler_sampler_state_offset)(plane) : 0; + + void *plane_desc_map = desc_map + offset; + /* For immutable samplers this was already done as part of the + * descriptor set create, as that info can't change later + */ + memcpy(plane_desc_map, + sampler->sampler_state, + sizeof(sampler->sampler_state)); + } } } @@ -1146,12 +1213,11 @@ v3dv_UpdateDescriptorSets(VkDevice _device, break; } case VK_DESCRIPTOR_TYPE_SAMPLER: { - /* If we are here we shouldn't be modifying a immutable sampler, - * so we don't ensure that would work or not crash. But let the - * validation layers check that - */ + /* If we are here we shouldn't be modifying an immutable sampler */ + assert(!binding_layout->immutable_samplers_offset); const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler); + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, NULL, sampler, writeset->dstArrayElement + j); @@ -1163,6 +1229,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device, case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView); + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, iview, NULL, writeset->dstArrayElement + j); @@ -1172,7 +1239,17 @@ v3dv_UpdateDescriptorSets(VkDevice _device, case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView); - V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler); + struct v3dv_sampler *sampler = NULL; + if (!binding_layout->immutable_samplers_offset) { + /* In general we ignore the sampler when updating a combined + * image sampler, but for YCbCr we kwnow that we must use + * immutable combined image samplers + */ + assert(iview->plane_count == 1); + V3DV_FROM_HANDLE(v3dv_sampler, _sampler, image_info->sampler); + sampler = _sampler; + } + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, iview, sampler, writeset->dstArrayElement + j); @@ -1447,23 +1524,3 @@ v3dv_UpdateDescriptorSetWithTemplate( } } } - -VKAPI_ATTR VkResult VKAPI_CALL -v3dv_CreateSamplerYcbcrConversion( - VkDevice _device, - const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSamplerYcbcrConversion *pYcbcrConversion) -{ - unreachable("Ycbcr sampler conversion is not supported"); - return VK_SUCCESS; -} - -VKAPI_ATTR void VKAPI_CALL -v3dv_DestroySamplerYcbcrConversion( - VkDevice _device, - VkSamplerYcbcrConversion YcbcrConversion, - const VkAllocationCallbacks *pAllocator) -{ - unreachable("Ycbcr sampler conversion is not supported"); -} diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 3a95857..adcc912 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -151,6 +151,9 @@ get_device_extensions(const struct v3dv_physical_device *device, .KHR_shader_float_controls = true, .KHR_shader_non_semantic_info = true, .KHR_sampler_mirror_clamp_to_edge = true, +#ifndef ANDROID + .KHR_sampler_ycbcr_conversion = true, +#endif .KHR_spirv_1_4 = true, .KHR_storage_buffer_storage_class = true, .KHR_timeline_semaphore = true, @@ -1239,7 +1242,11 @@ v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, /* FIXME: this needs support for non-constant index on UBO/SSBO */ .variablePointers = false, .protectedMemory = false, +#ifdef ANDROID .samplerYcbcrConversion = false, +#else + .samplerYcbcrConversion = true, +#endif .shaderDrawParameters = false, }; @@ -2596,14 +2603,28 @@ v3dv_InvalidateMappedMemoryRanges(VkDevice _device, static void get_image_memory_requirements(struct v3dv_image *image, + VkImageAspectFlagBits planeAspect, VkMemoryRequirements2 *pMemoryRequirements) { pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { .memoryTypeBits = 0x1, - .alignment = image->alignment, - .size = image->size + .alignment = image->planes[0].alignment, + .size = image->non_disjoint_size }; + if (planeAspect != VK_IMAGE_ASPECT_NONE) { + assert(image->format->plane_count > 1); + /* Disjoint images should have a 0 non_disjoint_size */ + assert(!pMemoryRequirements->memoryRequirements.size); + + uint8_t plane = v3dv_image_aspect_to_plane(image, planeAspect); + + VkMemoryRequirements *mem_reqs = + &pMemoryRequirements->memoryRequirements; + mem_reqs->alignment = image->planes[plane].alignment; + mem_reqs->size = image->planes[plane].size; + } + vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { @@ -2626,7 +2647,23 @@ v3dv_GetImageMemoryRequirements2(VkDevice device, VkMemoryRequirements2 *pMemoryRequirements) { V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image); - get_image_memory_requirements(image, pMemoryRequirements); + + VkImageAspectFlagBits planeAspect = VK_IMAGE_ASPECT_NONE; + vk_foreach_struct_const(ext, pInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: { + VkImagePlaneMemoryRequirementsInfo *req = + (VkImagePlaneMemoryRequirementsInfo *) ext; + planeAspect = req->planeAspect; + break; + } + default: + v3dv_debug_ignored_stype(ext->sType); + break; + } + } + + get_image_memory_requirements(image, planeAspect, pMemoryRequirements); } VKAPI_ATTR void VKAPI_CALL @@ -2644,7 +2681,23 @@ v3dv_GetDeviceImageMemoryRequirementsKHR( v3dv_image_init(device, pInfo->pCreateInfo, NULL, &image); assert(result == VK_SUCCESS); - get_image_memory_requirements(&image, pMemoryRequirements); + /* From VkDeviceImageMemoryRequirements spec: + * + * " planeAspect is a VkImageAspectFlagBits value specifying the aspect + * corresponding to the image plane to query. This parameter is ignored + * unless pCreateInfo::tiling is + * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, or pCreateInfo::flags has + * VK_IMAGE_CREATE_DISJOINT_BIT set" + * + * We need to explicitly ignore that flag, or following asserts could be + * triggered. + */ + VkImageAspectFlagBits planeAspect = + pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT || + pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT ? + pInfo->planeAspect : 0; + + get_image_memory_requirements(&image, planeAspect, pMemoryRequirements); } static void @@ -2659,11 +2712,43 @@ bind_image_memory(const VkBindImageMemoryInfo *info) * the VkMemoryRequirements structure returned from a call to * vkGetImageMemoryRequirements with image" */ - assert(info->memoryOffset % image->alignment == 0); assert(info->memoryOffset < mem->bo->size); - image->mem = mem; - image->mem_offset = info->memoryOffset; + uint64_t offset = info->memoryOffset; + if (image->non_disjoint_size) { + /* We only check for plane 0 as it is the only one that actually starts + * at that offset + */ + assert(offset % image->planes[0].alignment == 0); + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + image->planes[plane].mem = mem; + image->planes[plane].mem_offset = offset; + } + } else { + const VkBindImagePlaneMemoryInfo *plane_mem_info = + vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO); + assert(plane_mem_info); + + /* + * From VkBindImagePlaneMemoryInfo spec: + * + * "If the image’s tiling is VK_IMAGE_TILING_LINEAR or + * VK_IMAGE_TILING_OPTIMAL, then planeAspect must be a single valid + * format plane for the image" + * + * + * + * "If the image’s tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, + * then planeAspect must be a single valid memory plane for the + * image" + * + * So planeAspect should only refer to one plane. + */ + uint8_t plane = v3dv_plane_from_aspect(plane_mem_info->planeAspect); + assert(offset % image->planes[plane].alignment == 0); + image->planes[plane].mem = mem; + image->planes[plane].mem_offset = offset; + } } VKAPI_ATTR VkResult VKAPI_CALL @@ -2680,11 +2765,13 @@ v3dv_BindImageMemory2(VkDevice _device, struct v3dv_image *swapchain_image = v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain, swapchain_info->imageIndex); + /* Making the assumption that swapchain images are a single plane */ + assert(swapchain_image->plane_count == 1); VkBindImageMemoryInfo swapchain_bind = { .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, .image = pBindInfos[i].image, - .memory = v3dv_device_memory_to_handle(swapchain_image->mem), - .memoryOffset = swapchain_image->mem_offset, + .memory = v3dv_device_memory_to_handle(swapchain_image->planes[0].mem), + .memoryOffset = swapchain_image->planes[0].mem_offset, }; bind_image_memory(&swapchain_bind); } else @@ -2957,6 +3044,8 @@ v3dv_CreateSampler(VkDevice _device, if (!sampler) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + sampler->plane_count = 1; + sampler->compare_enable = pCreateInfo->compareEnable; sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates; @@ -2964,6 +3053,20 @@ v3dv_CreateSampler(VkDevice _device, vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + const VkSamplerYcbcrConversionInfo *ycbcr_conv_info = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); + + const struct vk_format_ycbcr_info *ycbcr_info = NULL; + + if (ycbcr_conv_info) { + VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, ycbcr_conv_info->conversion); + ycbcr_info = vk_format_get_ycbcr_info(conversion->format); + if (ycbcr_info) { + sampler->plane_count = ycbcr_info->n_planes; + sampler->conversion = conversion; + } + } + v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info); *pSampler = v3dv_sampler_to_handle(sampler); diff --git a/src/broadcom/vulkan/v3dv_formats.c b/src/broadcom/vulkan/v3dv_formats.c index e07e685..d1d25c7 100644 --- a/src/broadcom/vulkan/v3dv_formats.c +++ b/src/broadcom/vulkan/v3dv_formats.c @@ -30,7 +30,7 @@ #include "vulkan/wsi/wsi_common.h" const uint8_t * -v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f) +v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f, uint8_t plane) { const struct v3dv_format *vf = v3dv_X(device, get_format)(f); static const uint8_t fallback[] = {0, 1, 2, 3}; @@ -38,7 +38,7 @@ v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f) if (!vf) return fallback; - return vf->swizzle; + return vf->planes[plane].swizzle; } bool @@ -82,6 +82,9 @@ v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle) * involved). In these cases, it is safe to choose any format supported by * the TFU so long as it has the same texel size, which allows us to use the * TFU paths with formats that are not TFU supported otherwise. + * + * Even when copying multi-plane images, we are copying per-plane, so the + * compatible TFU format will be single-plane. */ const struct v3dv_format * v3dv_get_compatible_tfu_format(struct v3dv_device *device, @@ -102,20 +105,18 @@ v3dv_get_compatible_tfu_format(struct v3dv_device *device, *out_vk_format = vk_format; const struct v3dv_format *format = v3dv_X(device, get_format)(vk_format); - assert(v3dv_X(device, tfu_supports_tex_format)(format->tex_type)); + assert(format->plane_count == 1); + assert(v3dv_X(device, tfu_supports_tex_format)(format->planes[0].tex_type)); return format; } static VkFormatFeatureFlags2 -image_format_features(struct v3dv_physical_device *pdevice, - VkFormat vk_format, - const struct v3dv_format *v3dv_format, - VkImageTiling tiling) +image_format_plane_features(struct v3dv_physical_device *pdevice, + VkFormat vk_format, + const struct v3dv_format_plane *v3dv_format, + VkImageTiling tiling) { - if (!v3dv_format || !v3dv_format->supported) - return 0; - const VkImageAspectFlags aspects = vk_format_aspects(vk_format); const VkImageAspectFlags zs_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | @@ -146,16 +147,12 @@ image_format_features(struct v3dv_physical_device *pdevice, flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_2_BLIT_SRC_BIT; - if (v3dv_format->supports_filtering) - flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; } if (v3dv_format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { flags |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_BLIT_DST_BIT; - if (v3dv_X(pdevice, format_supports_blending)(v3dv_format)) - flags |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT; } else if (aspects & zs_aspects) { flags |= VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_2_BLIT_DST_BIT; @@ -193,12 +190,78 @@ image_format_features(struct v3dv_physical_device *pdevice, } static VkFormatFeatureFlags2 +image_format_features(struct v3dv_physical_device *pdevice, + VkFormat vk_format, + const struct v3dv_format *v3dv_format, + VkImageTiling tiling) +{ + if (!v3dv_format || !v3dv_format->plane_count) + return 0; + + VkFormatFeatureFlags2 flags = ~0ull; + for (uint8_t plane = 0; + flags && plane < v3dv_format->plane_count; + plane++) { + VkFormat plane_format = vk_format_get_plane_format(vk_format, plane); + + flags &= image_format_plane_features(pdevice, + plane_format, + &v3dv_format->planes[plane], + tiling); + } + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(vk_format); + + if (ycbcr_info) { + assert(v3dv_format->plane_count == ycbcr_info->n_planes); + + flags |= VK_FORMAT_FEATURE_2_DISJOINT_BIT; + + if (flags & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT) { + flags |= VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT; + for (unsigned p = 0; p < ycbcr_info->n_planes; p++) { + if (ycbcr_info->planes[p].denominator_scales[0] > 1 || + ycbcr_info->planes[p].denominator_scales[1] > 1) { + flags |= VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT; + break; + } + } + } + + /* FIXME: in the future we should be able to support BLIT_SRC via the + * blit_shader path + */ + const VkFormatFeatureFlags2 disallowed_ycbcr_image_features = + VK_FORMAT_FEATURE_2_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_2_BLIT_DST_BIT | + VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT | + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT; + + flags &= ~disallowed_ycbcr_image_features; + } + + if (flags & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT && + v3dv_format->supports_filtering) { + flags |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + if (flags & VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT && + v3dv_X(pdevice, format_supports_blending)(v3dv_format)) { + flags |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT; + } + + return flags; +} + +static VkFormatFeatureFlags2 buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format) { - if (!v3dv_format || !v3dv_format->supported) + if (!v3dv_format) return 0; - if (!v3dv_format->supported) + if (v3dv_format->plane_count != 1) return 0; /* We probably only want to support buffer formats that have a @@ -215,7 +278,7 @@ buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && desc->is_array) { flags |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT; - if (v3dv_format->tex_type != TEXTURE_DATA_FORMAT_NO) { + if (v3dv_format->planes[0].tex_type != TEXTURE_DATA_FORMAT_NO) { flags |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT; } @@ -409,9 +472,8 @@ get_image_format_properties( if (view_usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT)) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT)) goto unsupported; - } /* Sampling of raster depth/stencil images is not supported. Since 1D * images are always raster, even if the user requested optimal tiling, @@ -454,7 +516,8 @@ get_image_format_properties( pImageFormatProperties->maxExtent.width = V3D_MAX_IMAGE_DIMENSION; pImageFormatProperties->maxExtent.height = V3D_MAX_IMAGE_DIMENSION; pImageFormatProperties->maxExtent.depth = 1; - pImageFormatProperties->maxArrayLayers = V3D_MAX_ARRAY_LAYERS; + pImageFormatProperties->maxArrayLayers = + v3dv_format->plane_count == 1 ? V3D_MAX_ARRAY_LAYERS : 1; pImageFormatProperties->maxMipLevels = V3D_MAX_MIP_LEVELS; break; case VK_IMAGE_TYPE_3D: @@ -499,8 +562,42 @@ get_image_format_properties( if (tiling == VK_IMAGE_TILING_LINEAR) pImageFormatProperties->maxMipLevels = 1; + /* From the Vulkan 1.2 spec, section 12.3. Images, VkImageCreateInfo structure: + * + * "Images created with one of the formats that require a sampler Y′CBCR + * conversion, have further restrictions on their limits and + * capabilities compared to images created with other formats. Creation + * of images with a format requiring Y′CBCR conversion may not be + * supported unless other parameters meet all of the constraints: + * + * * imageType is VK_IMAGE_TYPE_2D + * * mipLevels is 1 + * * arrayLayers is 1, unless the ycbcrImageArrays feature is enabled, or + * otherwise indicated by VkImageFormatProperties::maxArrayLayers, as + * returned by vkGetPhysicalDeviceImageFormatProperties + * * samples is VK_SAMPLE_COUNT_1_BIT + * + * Implementations may support additional limits and capabilities beyond + * those listed above." + * + * We don't provide such additional limits, so we set those limits, or just + * return unsupported. + */ + if (vk_format_get_plane_count(info->format) > 1) { + if (info->type != VK_IMAGE_TYPE_2D) + goto unsupported; + pImageFormatProperties->maxMipLevels = 1; + pImageFormatProperties->maxArrayLayers = 1; + pImageFormatProperties->sampleCounts = VK_SAMPLE_COUNT_1_BIT; + } + pImageFormatProperties->maxResourceSize = 0xffffffff; /* 32-bit allocation */ + if (pYcbcrImageFormatProperties) { + pYcbcrImageFormatProperties->combinedImageSamplerDescriptorCount = + vk_format_get_plane_count(info->format); + } + return VK_SUCCESS; unsupported: @@ -561,6 +658,7 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_format_mod_info = NULL; VkExternalImageFormatProperties *external_props = NULL; + VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL; VkImageTiling tiling = base_info->tiling; /* Extract input structs */ @@ -600,6 +698,9 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: external_props = (void *) s; break; + case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: + ycbcr_props = (void *) s; + break; default: v3dv_debug_ignored_stype(s->sType); break; @@ -608,7 +709,8 @@ v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, VkResult result = get_image_format_properties(physical_device, base_info, tiling, - &base_props->imageFormatProperties, NULL); + &base_props->imageFormatProperties, + ycbcr_props); if (result != VK_SUCCESS) goto done; diff --git a/src/broadcom/vulkan/v3dv_image.c b/src/broadcom/vulkan/v3dv_image.c index 5bceee8..5350188 100644 --- a/src/broadcom/vulkan/v3dv_image.c +++ b/src/broadcom/vulkan/v3dv_image.c @@ -71,12 +71,15 @@ v3d_get_ub_pad(uint32_t cpp, uint32_t height) } static void -v3d_setup_slices(struct v3dv_image *image) +v3d_setup_plane_slices(struct v3dv_image *image, uint8_t plane, + uint32_t plane_offset) { - assert(image->cpp > 0); + assert(image->planes[plane].cpp > 0); + /* Texture Base Adress needs to be 64-byte aligned */ + assert(plane_offset % 64 == 0); - uint32_t width = image->vk.extent.width; - uint32_t height = image->vk.extent.height; + uint32_t width = image->planes[plane].width; + uint32_t height = image->planes[plane].height; uint32_t depth = image->vk.extent.depth; /* Note that power-of-two padding is based on level 1. These are not @@ -88,8 +91,8 @@ v3d_setup_slices(struct v3dv_image *image) uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1)); uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1)); - uint32_t utile_w = v3d_utile_width(image->cpp); - uint32_t utile_h = v3d_utile_height(image->cpp); + uint32_t utile_w = v3d_utile_width(image->planes[plane].cpp); + uint32_t utile_h = v3d_utile_height(image->planes[plane].cpp); uint32_t uif_block_w = utile_w * 2; uint32_t uif_block_h = utile_h * 2; @@ -106,9 +109,9 @@ v3d_setup_slices(struct v3dv_image *image) assert(depth > 0); assert(image->vk.mip_levels >= 1); - uint32_t offset = 0; + uint32_t offset = plane_offset; for (int32_t i = image->vk.mip_levels - 1; i >= 0; i--) { - struct v3d_resource_slice *slice = &image->slices[i]; + struct v3d_resource_slice *slice = &image->planes[plane].slices[i]; uint32_t level_width, level_height, level_depth; if (i < 2) { @@ -135,7 +138,7 @@ v3d_setup_slices(struct v3dv_image *image) if (!image->tiled) { slice->tiling = V3D_TILING_RASTER; if (image->vk.image_type == VK_IMAGE_TYPE_1D) - level_width = align(level_width, 64 / image->cpp); + level_width = align(level_width, 64 / image->planes[plane].cpp); } else { if ((i != 0 || !uif_top) && (level_width <= utile_w || level_height <= utile_h)) { @@ -157,7 +160,8 @@ v3d_setup_slices(struct v3dv_image *image) level_width = align(level_width, 4 * uif_block_w); level_height = align(level_height, uif_block_h); - slice->ub_pad = v3d_get_ub_pad(image->cpp, level_height); + slice->ub_pad = v3d_get_ub_pad(image->planes[plane].cpp, + level_height); level_height += slice->ub_pad * uif_block_h; /* If the padding set us to to be aligned to the page cache size, @@ -174,12 +178,13 @@ v3d_setup_slices(struct v3dv_image *image) } slice->offset = offset; - slice->stride = level_width * image->cpp; + slice->stride = level_width * image->planes[plane].cpp; slice->padded_height = level_height; if (slice->tiling == V3D_TILING_UIF_NO_XOR || slice->tiling == V3D_TILING_UIF_XOR) { slice->padded_height_of_output_image_in_uif_blocks = - slice->padded_height / (2 * v3d_utile_height(image->cpp)); + slice->padded_height / + (2 * v3d_utile_height(image->planes[plane].cpp)); } slice->size = level_height * slice->stride; @@ -199,7 +204,7 @@ v3d_setup_slices(struct v3dv_image *image) offset += slice_total_size; } - image->size = offset; + image->planes[plane].size = offset - plane_offset; /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only * needs to be aligned to utile boundaries. Since tiles are laid out @@ -214,18 +219,20 @@ v3d_setup_slices(struct v3dv_image *image) * used for transfer. */ if (image->tiled) { - image->alignment = 4096; + image->planes[plane].alignment = 4096; } else { - image->alignment = - (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ? 64 : image->cpp; + image->planes[plane].alignment = + (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ? 64 : image->planes[plane].cpp; } uint32_t align_offset = - align(image->slices[0].offset, image->alignment) - image->slices[0].offset; + align(image->planes[plane].slices[0].offset, + image->planes[plane].alignment) - + image->planes[plane].slices[0].offset; if (align_offset) { - image->size += align_offset; + image->planes[plane].size += align_offset; for (int i = 0; i < image->vk.mip_levels; i++) - image->slices[i].offset += align_offset; + image->planes[plane].slices[i].offset += align_offset; } /* Arrays and cube textures have a stride which is the distance from @@ -233,23 +240,43 @@ v3d_setup_slices(struct v3dv_image *image) * we need to program the stride between slices of miplevel 0. */ if (image->vk.image_type != VK_IMAGE_TYPE_3D) { - image->cube_map_stride = - align(image->slices[0].offset + image->slices[0].size, 64); - image->size += image->cube_map_stride * (image->vk.array_layers - 1); + image->planes[plane].cube_map_stride = + align(image->planes[plane].slices[0].offset + + image->planes[plane].slices[0].size, 64); + image->planes[plane].size += image->planes[plane].cube_map_stride * + (image->vk.array_layers - 1); } else { - image->cube_map_stride = image->slices[0].size; + image->planes[plane].cube_map_stride = image->planes[plane].slices[0].size; + } +} + +static void +v3d_setup_slices(struct v3dv_image *image, bool disjoint) +{ + if (disjoint && image->plane_count == 1) + disjoint = false; + + uint32_t offset = 0; + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + offset = disjoint ? 0 : offset; + v3d_setup_plane_slices(image, plane, offset); + offset += align(image->planes[plane].size, 64); } + + image->non_disjoint_size = disjoint ? 0 : offset; } uint32_t -v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer) +v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer, + uint8_t plane) { - const struct v3d_resource_slice *slice = &image->slices[level]; + const struct v3d_resource_slice *slice = &image->planes[plane].slices[level]; if (image->vk.image_type == VK_IMAGE_TYPE_3D) - return image->mem_offset + slice->offset + layer * slice->size; + return image->planes[plane].mem_offset + slice->offset + layer * slice->size; else - return image->mem_offset + slice->offset + layer * image->cube_map_stride; + return image->planes[plane].mem_offset + slice->offset + + layer * image->planes[plane].cube_map_stride; } VkResult @@ -322,13 +349,35 @@ v3dv_image_init(struct v3dv_device *device, const struct v3dv_format *format = v3dv_X(device, get_format)(pCreateInfo->format); - v3dv_assert(format != NULL && format->supported); + v3dv_assert(format != NULL && format->plane_count); assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT || pCreateInfo->samples == VK_SAMPLE_COUNT_4_BIT); image->format = format; - image->cpp = vk_format_get_blocksize(image->vk.format); + image->plane_count = vk_format_get_plane_count(pCreateInfo->format); + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(image->vk.format); + + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + VkFormat plane_format = + vk_format_get_plane_format(image->vk.format, plane); + image->planes[plane].cpp = + vk_format_get_blocksize(plane_format); + image->planes[plane].vk_format = plane_format; + + image->planes[plane].width = image->vk.extent.width; + image->planes[plane].height = image->vk.extent.height; + + if (ycbcr_info) { + image->planes[plane].width /= + ycbcr_info->planes[plane].denominator_scales[0]; + + image->planes[plane].height /= + ycbcr_info->planes[plane].denominator_scales[1]; + } + } image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL || (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && modifier != DRM_FORMAT_MOD_LINEAR); @@ -342,12 +391,16 @@ v3dv_image_init(struct v3dv_device *device, */ image->vk.create_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; - v3d_setup_slices(image); + bool disjoint = image->vk.create_flags & VK_IMAGE_CREATE_DISJOINT_BIT; + v3d_setup_slices(image, disjoint); #ifdef ANDROID if (native_buffer != NULL) { - image->slices[0].stride = native_buf_stride; - image->slices[0].size = image->size = native_buf_size; + assert(image->plane_count == 1); + image->planes[0].slices[0].stride = native_buf_stride; + image->non_disjoint_size = + image->planes[0].slices[0].size = + image->planes[0].size = native_buf_size; VkResult result = v3dv_import_native_buffer_fd(v3dv_device_to_handle(device), native_buf_fd, pAllocator, @@ -461,14 +514,28 @@ v3dv_GetImageSubresourceLayout(VkDevice device, { V3DV_FROM_HANDLE(v3dv_image, image, _image); + uint8_t plane = v3dv_plane_from_aspect(subresource->aspectMask); const struct v3d_resource_slice *slice = - &image->slices[subresource->mipLevel]; + &image->planes[plane].slices[subresource->mipLevel]; + + /* About why the offset below works for both disjoint and non-disjoint + * cases, from the Vulkan spec: + * + * "If the image is disjoint, then the offset is relative to the base + * address of the plane." + * + * "If the image is non-disjoint, then the offset is relative to the base + * address of the image." + * + * In our case, the per-plane mem_offset for non-disjoint images is the + * same for all planes and matches the base address of the image. + */ layout->offset = - v3dv_layer_offset(image, subresource->mipLevel, subresource->arrayLayer) - - image->mem_offset; + v3dv_layer_offset(image, subresource->mipLevel, subresource->arrayLayer, + plane) - image->planes[plane].mem_offset; layout->rowPitch = slice->stride; - layout->depthPitch = image->cube_map_stride; - layout->arrayPitch = image->cube_map_stride; + layout->depthPitch = image->planes[plane].cube_map_stride; + layout->arrayPitch = image->planes[plane].cube_map_stride; if (image->vk.image_type != VK_IMAGE_TYPE_3D) { layout->size = slice->size; @@ -483,7 +550,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device, layout->size = slice->size * image->vk.extent.depth; } else { const struct v3d_resource_slice *prev_slice = - &image->slices[subresource->mipLevel - 1]; + &image->planes[plane].slices[subresource->mipLevel - 1]; layout->size = prev_slice->offset - slice->offset; } } @@ -501,8 +568,11 @@ v3dv_DestroyImage(VkDevice _device, return; #ifdef ANDROID + assert(image->plane_count == 1); if (image->is_native_buffer_memory) - v3dv_FreeMemory(_device, v3dv_device_memory_to_handle(image->mem), pAllocator); + v3dv_FreeMemory(_device, + v3dv_device_memory_to_handle(image->planes[0].mem), + pAllocator); #endif vk_image_destroy(&device->vk, pAllocator, &image->vk); @@ -535,10 +605,31 @@ create_image_view(struct v3dv_device *device, if (iview == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + const VkImageAspectFlagBits any_plane_aspect = + VK_IMAGE_ASPECT_PLANE_0_BIT | + VK_IMAGE_ASPECT_PLANE_1_BIT | + VK_IMAGE_ASPECT_PLANE_2_BIT; + + if (image->vk.aspects & any_plane_aspect) { + assert((image->vk.aspects & ~any_plane_aspect) == 0); + iview->plane_count = 0; + static const VkImageAspectFlagBits plane_aspects[]= { + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT + }; + for (uint8_t plane = 0; plane < V3DV_MAX_PLANE_COUNT; plane++) { + if (iview->vk.aspects & plane_aspects[plane]) + iview->planes[iview->plane_count++].image_plane = plane; + } + } else { + iview->plane_count = 1; + iview->planes[0].image_plane = 0; + } + /* At this point we should have at least one plane */ + assert(iview->plane_count > 0); - iview->offset = v3dv_layer_offset(image, iview->vk.base_mip_level, - iview->vk.base_array_layer); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; /* If we have D24S8 format but the view only selects the stencil aspect * we want to re-interpret the format as RGBA8_UINT, then map our stencil @@ -567,22 +658,32 @@ create_image_view(struct v3dv_device *device, iview->vk.view_format = format; iview->format = v3dv_X(device, get_format)(format); - assert(iview->format && iview->format->supported); + assert(iview->format && iview->format->plane_count); - if (vk_format_is_depth_or_stencil(iview->vk.view_format)) { - iview->internal_type = - v3dv_X(device, get_internal_depth_type)(iview->vk.view_format); - } else { - v3dv_X(device, get_internal_type_bpp_for_output_format) - (iview->format->rt_type, &iview->internal_type, &iview->internal_bpp); - } + for (uint8_t plane = 0; plane < iview->plane_count; plane++) { + iview->planes[plane].offset = v3dv_layer_offset(image, + iview->vk.base_mip_level, + iview->vk.base_array_layer, + plane); - const uint8_t *format_swizzle = v3dv_get_format_swizzle(device, format); - util_format_compose_swizzles(format_swizzle, image_view_swizzle, - iview->swizzle); + if (vk_format_is_depth_or_stencil(iview->vk.view_format)) { + iview->planes[plane].internal_type = + v3dv_X(device, get_internal_depth_type)(iview->vk.view_format); + } else { + v3dv_X(device, get_internal_type_bpp_for_output_format) + (iview->format->planes[plane].rt_type, + &iview->planes[plane].internal_type, + &iview->planes[plane].internal_bpp); + } - iview->swap_rb = v3dv_format_swizzle_needs_rb_swap(format_swizzle); - iview->channel_reverse = v3dv_format_swizzle_needs_reverse(format_swizzle); + const uint8_t *format_swizzle = + v3dv_get_format_swizzle(device, format, plane); + util_format_compose_swizzles(format_swizzle, image_view_swizzle, + iview->planes[plane].swizzle); + + iview->planes[plane].swap_rb = v3dv_format_swizzle_needs_rb_swap(format_swizzle); + iview->planes[plane].channel_reverse = v3dv_format_swizzle_needs_reverse(format_swizzle); + } v3dv_X(device, pack_texture_shader_state)(device, iview); @@ -657,8 +758,10 @@ v3dv_CreateBufferView(VkDevice _device, view->vk_format = pCreateInfo->format; view->format = v3dv_X(device, get_format)(view->vk_format); + /* We don't support multi-plane formats for buffer views */ + assert(view->format->plane_count == 1); v3dv_X(device, get_internal_type_bpp_for_output_format) - (view->format->rt_type, &view->internal_type, &view->internal_bpp); + (view->format->planes[0].rt_type, &view->internal_type, &view->internal_bpp); if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT || buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c index 7bc76f2..38c7b44 100644 --- a/src/broadcom/vulkan/v3dv_meta_clear.c +++ b/src/broadcom/vulkan/v3dv_meta_clear.c @@ -67,7 +67,13 @@ clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, { const VkOffset3D origin = { 0, 0, 0 }; VkFormat fb_format; - if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format)) + + /* From vkCmdClearColorImage spec: + * "image must not use any of the formats that require a sampler YCBCR + * conversion" + */ + assert(image->plane_count == 1); + if (!v3dv_meta_can_use_tlb(image, 0, &origin, &fb_format)) return false; uint32_t internal_type, internal_bpp; diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c index ac00eaa..07b8cdf 100644 --- a/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/src/broadcom/vulkan/v3dv_meta_copy.c @@ -350,18 +350,22 @@ get_compatible_tlb_format(VkFormat format) /** * Checks if we can implement an image copy or clear operation using the TLB * hardware. + * + * For tlb copies we are doing a per-plane copy, so for multi-plane formats, + * the compatible format will be single-plane. */ bool v3dv_meta_can_use_tlb(struct v3dv_image *image, + uint8_t plane, const VkOffset3D *offset, VkFormat *compat_format) { if (offset->x != 0 || offset->y != 0) return false; - if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { + if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { if (compat_format) - *compat_format = image->vk.format; + *compat_format = image->planes[plane].vk_format; return true; } @@ -369,9 +373,11 @@ v3dv_meta_can_use_tlb(struct v3dv_image *image, * a compatible format instead. */ if (compat_format) { - *compat_format = get_compatible_tlb_format(image->vk.format); - if (*compat_format != VK_FORMAT_UNDEFINED) + *compat_format = get_compatible_tlb_format(image->planes[plane].vk_format); + if (*compat_format != VK_FORMAT_UNDEFINED) { + assert(vk_format_get_plane_count(*compat_format) == 1); return true; + } } return false; @@ -394,7 +400,10 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, const VkBufferImageCopy2 *region) { VkFormat fb_format; - if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) + uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask); + assert(plane < image->plane_count); + + if (!v3dv_meta_can_use_tlb(image, plane, ®ion->imageOffset, &fb_format)) return false; uint32_t internal_type, internal_bpp; @@ -415,8 +424,10 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy from compressed format using a compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); - const uint32_t block_h = vk_format_get_blockheight(image->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(image->planes[plane].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(image->planes[plane].vk_format); const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); @@ -461,21 +472,30 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, bool handled = false; /* This path uses a shader blit which doesn't support linear images. Return - * early to avoid all te heavy lifting in preparation for the blit_shader() - * call that is bound to fail in that scenario. + * early to avoid all the heavy lifting in preparation for the + * blit_shader() call that is bound to fail in that scenario. */ if (image->vk.tiling == VK_IMAGE_TILING_LINEAR && image->vk.image_type != VK_IMAGE_TYPE_1D) { return handled; } + VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask; + /* For multi-planar images we copy one plane at a time using an image alias + * with a color aspect for each plane. + */ + if (image->plane_count > 1) + dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + VkImageAspectFlags src_copy_aspect = region->imageSubresource.aspectMask; + uint8_t plane = v3dv_plane_from_aspect(src_copy_aspect); + assert(plane < image->plane_count); + /* Generally, the bpp of the data in the buffer matches that of the * source image. The exception is the case where we are copying * stencil (8bpp) to a combined d24s8 image (32bpp). */ - uint32_t buffer_bpp = image->cpp; - - VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask; + uint32_t buffer_bpp = image->planes[plane].cpp; /* Because we are going to implement the copy as a blit, we need to create * a linear image from the destination buffer and we also want our blit @@ -498,22 +518,23 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, }; switch (buffer_bpp) { case 16: - assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); + assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); dst_format = VK_FORMAT_R32G32B32A32_UINT; src_format = dst_format; break; case 8: - assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); + assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); dst_format = VK_FORMAT_R16G16B16A16_UINT; src_format = dst_format; break; case 4: - switch (copy_aspect) { + switch (dst_copy_aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: src_format = VK_FORMAT_R8G8B8A8_UINT; dst_format = VK_FORMAT_R8G8B8A8_UINT; break; case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(image->plane_count == 1); assert(image->vk.format == VK_FORMAT_D32_SFLOAT || image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32); @@ -538,7 +559,8 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, } break; case VK_IMAGE_ASPECT_STENCIL_BIT: - assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT); + assert(image->plane_count == 1); + assert(dst_copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT); assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT); /* Copying from S8D24. We want to write 8-bit stencil values only, * so adjust the buffer bpp for that. Since the hardware stores stencil @@ -554,13 +576,13 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, }; break; case 2: - assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT || - copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT); + assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT || + dst_copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT); dst_format = VK_FORMAT_R16_UINT; src_format = dst_format; break; case 1: - assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); + assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT); dst_format = VK_FORMAT_R8_UINT; src_format = dst_format; break; @@ -575,7 +597,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, */ assert(vk_format_is_color(src_format)); assert(vk_format_is_color(dst_format)); - copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT; /* We should be able to handle the blit if we got this far */ handled = true; @@ -593,8 +615,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, buf_height = region->bufferImageHeight; /* If the image is compressed, the bpp refers to blocks, not pixels */ - uint32_t block_width = vk_format_get_blockwidth(image->vk.format); - uint32_t block_height = vk_format_get_blockheight(image->vk.format); + uint32_t block_width = + vk_format_get_blockwidth(image->planes[plane].vk_format); + uint32_t block_height = + vk_format_get_blockheight(image->planes[plane].vk_format); buf_width = buf_width / block_width; buf_height = buf_height / block_height; @@ -618,6 +642,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_device *device = cmd_buffer->device; VkDevice _device = v3dv_device_to_handle(device); if (vk_format_is_compressed(image->vk.format)) { + assert(image->plane_count == 1); VkImage uiview; VkImageCreateInfo uiview_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -643,8 +668,8 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, result = vk_common_BindImageMemory(_device, uiview, - v3dv_device_memory_to_handle(image->mem), - image->mem_offset); + v3dv_device_memory_to_handle(image->planes[plane].mem), + image->planes[plane].mem_offset); if (result != VK_SUCCESS) return handled; @@ -700,7 +725,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, const VkImageBlit2 blit_region = { .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, .srcSubresource = { - .aspectMask = copy_aspect, + .aspectMask = src_copy_aspect, .mipLevel = region->imageSubresource.mipLevel, .baseArrayLayer = region->imageSubresource.baseArrayLayer + i, .layerCount = 1, @@ -720,7 +745,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, }, }, .dstSubresource = { - .aspectMask = copy_aspect, + .aspectMask = dst_copy_aspect, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, @@ -765,13 +790,16 @@ v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, cmd_buffer->state.is_transfer = true; for (uint32_t i = 0; i < info->regionCount; i++) { - if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i])) + const VkBufferImageCopy2 *region = &info->pRegions[i]; + + if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region)) continue; - if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i])) + + if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region)) continue; + unreachable("Unsupported image to buffer copy."); } - cmd_buffer->state.is_transfer = false; } @@ -797,7 +825,7 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; if (region->dstSubresource.aspectMask != ds_aspects) - return false; + return false; } /* Don't handle copies between uncompressed and compressed formats for now. @@ -822,9 +850,14 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, if (region->dstOffset.x != 0 || region->dstOffset.y != 0) return false; + uint8_t src_plane = + v3dv_plane_from_aspect(region->srcSubresource.aspectMask); + uint8_t dst_plane = + v3dv_plane_from_aspect(region->dstSubresource.aspectMask); + const uint32_t dst_mip_level = region->dstSubresource.mipLevel; - uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level); - uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level); + uint32_t dst_width = u_minify(dst->planes[dst_plane].width, dst_mip_level); + uint32_t dst_height = u_minify(dst->planes[dst_plane].height, dst_mip_level); if (region->extent.width != dst_width || region->extent.height != dst_height) return false; @@ -834,8 +867,10 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * members represent the texel dimensions of the source image and not * the destination." */ - const uint32_t block_w = vk_format_get_blockwidth(src->vk.format); - const uint32_t block_h = vk_format_get_blockheight(src->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(src->planes[src_plane].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(src->planes[src_plane].vk_format); uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); @@ -859,10 +894,10 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * the underlying pixel data according to its format, we can always choose * to use compatible formats that are supported with the TFU unit. */ - assert(dst->cpp == src->cpp); + assert(dst->planes[dst_plane].cpp == src->planes[src_plane].cpp); const struct v3dv_format *format = v3dv_get_compatible_tfu_format(cmd_buffer->device, - dst->cpp, NULL); + dst->planes[dst_plane].cpp, NULL); /* Emit a TFU job for each layer to blit */ const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ? @@ -876,29 +911,32 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, region->dstSubresource.baseArrayLayer : region->dstOffset.z; for (uint32_t i = 0; i < layer_count; i++) { const uint32_t dst_offset = - dst->mem->bo->offset + - v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i); + dst->planes[dst_plane].mem->bo->offset + + v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i, dst_plane); const uint32_t src_offset = - src->mem->bo->offset + - v3dv_layer_offset(src, src_mip_level, base_src_layer + i); + src->planes[src_plane].mem->bo->offset + + v3dv_layer_offset(src, src_mip_level, base_src_layer + i, src_plane); - const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; - const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; + const struct v3d_resource_slice *dst_slice = + &dst->planes[dst_plane].slices[dst_mip_level]; + const struct v3d_resource_slice *src_slice = + &src->planes[src_plane].slices[src_mip_level]; v3dv_X(cmd_buffer->device, meta_emit_tfu_job)( cmd_buffer, - dst->mem->bo->handle, + dst->planes[dst_plane].mem->bo->handle, dst_offset, dst_slice->tiling, dst_slice->padded_height, - dst->cpp, - src->mem->bo->handle, + dst->planes[dst_plane].cpp, + src->planes[src_plane].mem->bo->handle, src_offset, src_slice->tiling, src_slice->tiling == V3D_TILING_RASTER ? src_slice->stride : src_slice->padded_height, - src->cpp, - width, height, format); + src->planes[src_plane].cpp, + /* All compatible TFU formats are single-plane */ + width, height, &format->planes[0]); } return true; @@ -914,11 +952,17 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *src, const VkImageCopy2 *region) { + uint8_t src_plane = + v3dv_plane_from_aspect(region->srcSubresource.aspectMask); + assert(src_plane < src->plane_count); + uint8_t dst_plane = + v3dv_plane_from_aspect(region->dstSubresource.aspectMask); + assert(dst_plane < dst->plane_count); + VkFormat fb_format; - if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) || - !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) { + if (!v3dv_meta_can_use_tlb(src, src_plane, ®ion->srcOffset, &fb_format) || + !v3dv_meta_can_use_tlb(dst, dst_plane, ®ion->dstOffset, &fb_format)) return false; - } /* From the Vulkan spec, VkImageCopy valid usage: * @@ -926,7 +970,8 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, * dstImage has a multi-planar image format then the aspectMask member * of srcSubresource and dstSubresource must match." */ - assert(region->dstSubresource.aspectMask == + assert(src->plane_count != 1 || dst->plane_count != 1 || + region->dstSubresource.aspectMask == region->srcSubresource.aspectMask); uint32_t internal_type, internal_bpp; v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) @@ -956,8 +1001,10 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy to compressed image using compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); - const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(dst->planes[dst_plane].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(dst->planes[dst_plane].vk_format); const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); @@ -997,6 +1044,8 @@ create_image_alias(struct v3dv_cmd_buffer *cmd_buffer, VkFormat format) { assert(!vk_format_is_compressed(format)); + /* We don't support ycbcr compressed formats */ + assert(src->plane_count == 1); VkDevice _device = v3dv_device_to_handle(cmd_buffer->device); @@ -1025,8 +1074,8 @@ create_image_alias(struct v3dv_cmd_buffer *cmd_buffer, } struct v3dv_image *image = v3dv_image_from_handle(_image); - image->mem = src->mem; - image->mem_offset = src->mem_offset; + image->planes[0].mem = src->planes[0].mem; + image->planes[0].mem_offset = src->planes[0].mem_offset; return image; } @@ -1044,10 +1093,21 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, src->vk.image_type != VK_IMAGE_TYPE_1D) return false; - const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); - const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); - const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); - const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); + uint8_t src_plane = + v3dv_plane_from_aspect(region->srcSubresource.aspectMask); + assert(src_plane < src->plane_count); + uint8_t dst_plane = + v3dv_plane_from_aspect(region->dstSubresource.aspectMask); + assert(dst_plane < dst->plane_count); + + const uint32_t src_block_w = + vk_format_get_blockwidth(src->planes[src_plane].vk_format); + const uint32_t src_block_h = + vk_format_get_blockheight(src->planes[src_plane].vk_format); + const uint32_t dst_block_w = + vk_format_get_blockwidth(dst->planes[dst_plane].vk_format); + const uint32_t dst_block_h = + vk_format_get_blockheight(dst->planes[dst_plane].vk_format); const float block_scale_w = (float)src_block_w / (float)dst_block_w; const float block_scale_h = (float)src_block_h / (float)dst_block_h; @@ -1083,10 +1143,10 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, * divisors for the width and height depending on the source image's * bpp. */ - assert(src->cpp == dst->cpp); + assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp); format = VK_FORMAT_R32G32_UINT; - switch (src->cpp) { + switch (src->planes[src_plane].cpp) { case 16: format = VK_FORMAT_R32G32B32A32_UINT; break; @@ -1111,13 +1171,15 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, dst = create_image_alias(cmd_buffer, dst, dst_scale_w, dst_scale_h, format); } else { - format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ? - src->vk.format : get_compatible_tlb_format(src->vk.format); + format = src->format->planes[src_plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ? + src->planes[src_plane].vk_format : + get_compatible_tlb_format(src->planes[src_plane].vk_format); if (format == VK_FORMAT_UNDEFINED) return false; const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format); - if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO) + assert(f->plane_count < 2); + if (!f->plane_count || f->planes[0].tex_type == TEXTURE_DATA_FORMAT_NO) return false; } @@ -1199,8 +1261,13 @@ copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer, assert(region->dstSubresource.baseArrayLayer == 0); assert(region->dstSubresource.layerCount == 1); - const uint32_t bpp = src->cpp; - assert(src->cpp == dst->cpp); + uint8_t src_plane = + v3dv_plane_from_aspect(region->srcSubresource.aspectMask); + uint8_t dst_plane = + v3dv_plane_from_aspect(region->dstSubresource.aspectMask); + + assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp); + const uint32_t bpp = src->planes[src_plane].cpp; VkFormat format; switch (bpp) { @@ -1231,9 +1298,8 @@ copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer, .a = VK_COMPONENT_SWIZZLE_IDENTITY, }; - const uint32_t buf_stride = src->slices[0].stride; + const uint32_t buf_stride = src->planes[src_plane].slices[0].stride; const VkDeviceSize buf_offset = - v3dv_layer_offset(src, 0, 0) + region->srcOffset.y * buf_stride + region->srcOffset.x * bpp; struct v3dv_buffer src_buffer; @@ -1242,18 +1308,19 @@ copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer, const struct VkBufferCreateInfo buf_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = src->size, + .size = src->planes[src_plane].size, .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; v3dv_buffer_init(cmd_buffer->device, &buf_create_info, &src_buffer, - src->alignment); + src->planes[src_plane].alignment); const VkBindBufferMemoryInfo buf_bind_info = { .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, .buffer = v3dv_buffer_to_handle(&src_buffer), - .memory = v3dv_device_memory_to_handle(src->mem), - .memoryOffset = src->mem_offset, + .memory = v3dv_device_memory_to_handle(src->planes[src_plane].mem), + .memoryOffset = src->planes[src_plane].mem_offset + + v3dv_layer_offset(src, 0, 0, src_plane), }; v3dv_buffer_bind_memory(&buf_bind_info); @@ -1269,12 +1336,12 @@ copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer, }; return texel_buffer_shader_copy(cmd_buffer, - VK_IMAGE_ASPECT_COLOR_BIT, + region->dstSubresource.aspectMask, dst, format, format, &src_buffer, - src->cpp, + src->planes[src_plane].cpp, 0 /* color mask: full */, &ident_swizzle, 1, ©_region); } @@ -1293,13 +1360,14 @@ v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, cmd_buffer->state.is_transfer = true; for (uint32_t i = 0; i < info->regionCount; i++) { - if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i])) + const VkImageCopy2 *region = &info->pRegions[i]; + if (copy_image_tfu(cmd_buffer, dst, src, region)) continue; - if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i])) + if (copy_image_tlb(cmd_buffer, dst, src, region)) continue; - if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i])) + if (copy_image_blit(cmd_buffer, dst, src, region)) continue; - if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, &info->pRegions[i])) + if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, region)) continue; unreachable("Image copy not supported"); } @@ -1461,12 +1529,18 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, else height = region->bufferImageHeight; - if (width != image->vk.extent.width || height != image->vk.extent.height) + uint8_t plane = + v3dv_plane_from_aspect(region->imageSubresource.aspectMask); + + if (width != image->planes[plane].width || + height != image->planes[plane].height) return false; /* Handle region semantics for compressed images */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); - const uint32_t block_h = vk_format_get_blockheight(image->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(image->planes[plane].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(image->planes[plane].vk_format); width = DIV_ROUND_UP(width, block_w); height = DIV_ROUND_UP(height, block_h); @@ -1477,10 +1551,13 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, */ const struct v3dv_format *format = v3dv_get_compatible_tfu_format(cmd_buffer->device, - image->cpp, NULL); + image->planes[plane].cpp, NULL); + /* We only use single-plane formats with the TFU */ + assert(format->plane_count == 1); + const struct v3dv_format_plane *format_plane = &format->planes[0]; const uint32_t mip_level = region->imageSubresource.mipLevel; - const struct v3d_resource_slice *slice = &image->slices[mip_level]; + const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level]; uint32_t num_layers; if (image->vk.image_type != VK_IMAGE_TYPE_3D) @@ -1489,14 +1566,14 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, num_layers = region->imageExtent.depth; assert(num_layers > 0); - assert(image->mem && image->mem->bo); - const struct v3dv_bo *dst_bo = image->mem->bo; + assert(image->planes[plane].mem && image->planes[plane].mem->bo); + const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo; assert(buffer->mem && buffer->mem->bo); const struct v3dv_bo *src_bo = buffer->mem->bo; /* Emit a TFU job per layer to copy */ - const uint32_t buffer_stride = width * image->cpp; + const uint32_t buffer_stride = width * image->planes[plane].cpp; for (int i = 0; i < num_layers; i++) { uint32_t layer; if (image->vk.image_type != VK_IMAGE_TYPE_3D) @@ -1510,7 +1587,7 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, const uint32_t src_offset = src_bo->offset + buffer_offset; const uint32_t dst_offset = - dst_bo->offset + v3dv_layer_offset(image, mip_level, layer); + dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane); v3dv_X(cmd_buffer->device, meta_emit_tfu_job)( cmd_buffer, @@ -1518,13 +1595,13 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, dst_offset, slice->tiling, slice->padded_height, - image->cpp, + image->planes[plane].cpp, src_bo->handle, src_offset, V3D_TILING_RASTER, width, 1, - width, height, format); + width, height, format_plane); } return true; @@ -1541,7 +1618,10 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, const VkBufferImageCopy2 *region) { VkFormat fb_format; - if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) + uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask); + assert(plane < image->plane_count); + + if (!v3dv_meta_can_use_tlb(image, plane, ®ion->imageOffset, &fb_format)) return false; uint32_t internal_type, internal_bpp; @@ -1562,8 +1642,10 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy to compressed format using a compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); - const uint32_t block_h = vk_format_get_blockheight(image->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(image->planes[plane].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(image->planes[plane].vk_format); const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); @@ -2230,8 +2312,10 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, * For 3D images, this creates a layered framebuffer with a number of * layers matching the depth extent of the 3D image. */ - uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel); - uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel); + uint8_t plane = v3dv_plane_from_aspect(aspect); + uint32_t fb_width = u_minify(image->planes[plane].width, resource->mipLevel); + uint32_t fb_height = u_minify(image->planes[plane].height, resource->mipLevel); + VkImageViewCreateInfo image_view_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = v3dv_image_to_handle(image), @@ -2489,8 +2573,13 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, */ assert(num_layers == 1 || region_count == 1); - const uint32_t block_width = vk_format_get_blockwidth(image->vk.format); - const uint32_t block_height = vk_format_get_blockheight(image->vk.format); + uint8_t plane = v3dv_plane_from_aspect(aspect); + assert(plane < image->plane_count); + + const uint32_t block_width = + vk_format_get_blockwidth(image->planes[plane].vk_format); + const uint32_t block_height = + vk_format_get_blockheight(image->planes[plane].vk_format); /* Copy regions by uploading each region to a temporary tiled image using * the memory we have just allocated as storage. @@ -2547,6 +2636,13 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, if (result != VK_SUCCESS) return handled; + /* When copying a multi-plane image the aspect indicates the plane to + * copy. For these, we only copy one plane at a time, which is always + * a color plane. + */ + VkImageAspectFlags copy_aspect = + image->plane_count == 1 ? aspect : VK_IMAGE_ASPECT_COLOR_BIT; + /* Upload buffer contents for the selected layer */ const VkDeviceSize buf_offset_bytes = region->bufferOffset + i * buf_height * buf_width * buffer_bpp; @@ -2556,7 +2652,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, .bufferRowLength = region->bufferRowLength / block_width, .bufferImageHeight = region->bufferImageHeight / block_height, .imageSubresource = { - .aspectMask = aspect, + .aspectMask = copy_aspect, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, @@ -2588,7 +2684,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, const VkImageBlit2 blit_region = { .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, .srcSubresource = { - .aspectMask = aspect, + .aspectMask = copy_aspect, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, @@ -2652,12 +2748,20 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, * the same aspect. */ VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask; + const VkImageAspectFlagBits any_plane_aspect = + VK_IMAGE_ASPECT_PLANE_0_BIT | + VK_IMAGE_ASPECT_PLANE_1_BIT | + VK_IMAGE_ASPECT_PLANE_2_BIT; + + bool is_plane_aspect = aspect & any_plane_aspect; /* Generally, the bpp of the data in the buffer matches that of the * destination image. The exception is the case where we are uploading * stencil (8bpp) to a combined d24s8 image (32bpp). */ - uint32_t buf_bpp = image->cpp; + uint8_t plane = v3dv_plane_from_aspect(aspect); + assert(plane < image->plane_count); + uint32_t buf_bpp = image->planes[plane].cpp; /* We are about to upload the buffer data to an image so we can then * blit that to our destination region. Because we are going to implement @@ -2690,6 +2794,9 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, case 4: switch (aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: + case VK_IMAGE_ASPECT_PLANE_0_BIT: + case VK_IMAGE_ASPECT_PLANE_1_BIT: + case VK_IMAGE_ASPECT_PLANE_2_BIT: src_format = VK_FORMAT_R8G8B8A8_UINT; dst_format = src_format; break; @@ -2736,12 +2843,13 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, break; case 2: assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || - aspect == VK_IMAGE_ASPECT_DEPTH_BIT); + aspect == VK_IMAGE_ASPECT_DEPTH_BIT || + is_plane_aspect); src_format = VK_FORMAT_R16_UINT; dst_format = src_format; break; case 1: - assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); + assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || is_plane_aspect); src_format = VK_FORMAT_R8_UINT; dst_format = src_format; break; @@ -2796,7 +2904,10 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, else buffer_height = region->bufferImageHeight; - uint32_t buffer_stride = buffer_width * image->cpp; + uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask); + assert(plane < image->plane_count); + + uint32_t buffer_stride = buffer_width * image->planes[plane].cpp; uint32_t buffer_layer_stride = buffer_stride * buffer_height; uint32_t num_layers; @@ -2825,6 +2936,7 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, job->cpu.copy_buffer_to_image.base_layer = region->imageSubresource.baseArrayLayer; job->cpu.copy_buffer_to_image.layer_count = num_layers; + job->cpu.copy_buffer_to_image.plane = plane; list_addtail(&job->list_link, &cmd_buffer->jobs); @@ -2891,8 +3003,7 @@ v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, * slow it might not be worth it and we should instead put more effort * in handling more cases with the other paths. */ - if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, - &info->pRegions[r])) { + if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &info->pRegions[r])) { batch_size = 1; goto handled; } @@ -2932,6 +3043,15 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT); + /* From vkCmdBlitImage: + * "srcImage must not use a format that requires a sampler YCBCR + * conversion" + * "dstImage must not use a format that requires a sampler YCBCR + * conversion" + */ + assert(dst->plane_count == 1); + assert(src->plane_count == 1); + /* Format must match */ if (src->vk.format != dst->vk.format) return false; @@ -2979,7 +3099,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, */ const struct v3dv_format *format = v3dv_get_compatible_tfu_format(cmd_buffer->device, - dst->cpp, NULL); + dst->planes[0].cpp, NULL); /* Emit a TFU job for each layer to blit */ assert(region->dstSubresource.layerCount == @@ -3027,27 +3147,29 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, src_mirror_z ? max_src_layer - i - 1: min_src_layer + i; const uint32_t dst_offset = - dst->mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); + dst->planes[0].mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level, + dst_layer, 0); const uint32_t src_offset = - src->mem->bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); + src->planes[0].mem->bo->offset + v3dv_layer_offset(src, src_mip_level, + src_layer, 0); - const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; - const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; + const struct v3d_resource_slice *dst_slice = &dst->planes[0].slices[dst_mip_level]; + const struct v3d_resource_slice *src_slice = &src->planes[0].slices[src_mip_level]; v3dv_X(cmd_buffer->device, meta_emit_tfu_job)( cmd_buffer, - dst->mem->bo->handle, + dst->planes[0].mem->bo->handle, dst_offset, dst_slice->tiling, dst_slice->padded_height, - dst->cpp, - src->mem->bo->handle, + dst->planes[0].cpp, + src->planes[0].mem->bo->handle, src_offset, src_slice->tiling, src_slice->tiling == V3D_TILING_RASTER ? src_slice->stride : src_slice->padded_height, - src->cpp, - dst_width, dst_height, format); + src->planes[0].cpp, + dst_width, dst_height, &format->planes[0]); } return true; @@ -3904,6 +4026,8 @@ allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer, * cmask parameter (which can be 0 to default to all channels), as well as a * swizzle to apply to the source via the cswizzle parameter (which can be NULL * to use the default identity swizzle). + * + * Supports multi-plane formats too. */ static bool blit_shader(struct v3dv_cmd_buffer *cmd_buffer, @@ -3962,6 +4086,13 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, src_format = dst_format; } + uint8_t src_plane = + v3dv_plane_from_aspect(region->srcSubresource.aspectMask); + assert(src_plane < src->plane_count); + uint8_t dst_plane = + v3dv_plane_from_aspect(region->dstSubresource.aspectMask); + assert(dst_plane < dst->plane_count); + const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | @@ -3984,10 +4115,14 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, * need to apply those same semantics here when we compute the size of the * destination image level. */ - const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); - const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); - const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); - const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); + const uint32_t dst_block_w = + vk_format_get_blockwidth(dst->planes[dst_plane].vk_format); + const uint32_t dst_block_h = + vk_format_get_blockheight(dst->planes[dst_plane].vk_format); + const uint32_t src_block_w = + vk_format_get_blockwidth(src->planes[src_plane].vk_format); + const uint32_t src_block_h = + vk_format_get_blockheight(src->planes[src_plane].vk_format); const uint32_t dst_level_w = u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w), region->dstSubresource.mipLevel); @@ -3996,9 +4131,11 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, region->dstSubresource.mipLevel); const uint32_t src_level_w = - u_minify(src->vk.extent.width, region->srcSubresource.mipLevel); + u_minify(src->planes[src_plane].width, region->srcSubresource.mipLevel); const uint32_t src_level_h = - u_minify(src->vk.extent.height, region->srcSubresource.mipLevel); + u_minify(src->planes[src_plane].height, region->srcSubresource.mipLevel); + + assert(src->plane_count == 1 || src->vk.image_type != VK_IMAGE_TYPE_3D); const uint32_t src_level_d = u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel); @@ -4335,7 +4472,16 @@ v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage); V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage); - /* This command can only happen outside a render pass */ + /* From vkCmdBlitImage: + * "srcImage must not use a format that requires a sampler YCBCR + * conversion" + * "dstImage must not use a format that requires a sampler YCBCR + * conversion" + */ + assert(src->plane_count == 1); + assert(dst->plane_count == 1); + + /* This command can only happen outside a render pass */ assert(cmd_buffer->state.pass == NULL); assert(cmd_buffer->state.job == NULL); @@ -4349,13 +4495,15 @@ v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, cmd_buffer->state.is_transfer = true; for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) { - if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i])) + const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i]; + + if (blit_tfu(cmd_buffer, dst, src, region)) continue; if (blit_shader(cmd_buffer, dst, dst->vk.format, src, src->vk.format, 0, NULL, - &pBlitImageInfo->pRegions[i], + region, pBlitImageInfo->filter, true)) { continue; } @@ -4371,8 +4519,12 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *src, const VkImageResolve2 *region) { - if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) || - !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) { + /* No resolve for multi-planar images. Using plane 0 */ + assert(dst->plane_count == 1); + assert(src->plane_count == 1); + + if (!v3dv_meta_can_use_tlb(src, 0, ®ion->srcOffset, NULL) || + !v3dv_meta_can_use_tlb(dst, 0, ®ion->dstOffset, NULL)) { return false; } @@ -4393,8 +4545,10 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, if (!job) return true; - const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); - const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); + const uint32_t block_w = + vk_format_get_blockwidth(dst->planes[0].vk_format); + const uint32_t block_h = + vk_format_get_blockheight(dst->planes[0].vk_format); const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); @@ -4466,6 +4620,10 @@ v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT); assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); + /* We don't support multi-sampled multi-plane images */ + assert(src->plane_count == 1); + assert(dst->plane_count == 1); + cmd_buffer->state.is_transfer = true; for (uint32_t i = 0; i < info->regionCount; i++) { diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c index bf70324..683acde 100644 --- a/src/broadcom/vulkan/v3dv_pass.c +++ b/src/broadcom/vulkan/v3dv_pass.c @@ -332,8 +332,10 @@ subpass_get_granularity(struct v3dv_device *device, &pass->attachments[attachment_idx].desc; const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format); uint32_t internal_type, internal_bpp; + /* We don't support rendering to YCbCr images */ + assert(format->plane_count == 1); v3dv_X(device, get_internal_type_bpp_for_output_format) - (format->rt_type, &internal_type, &internal_bpp); + (format->planes[0].rt_type, &internal_type, &internal_bpp); max_bpp = MAX2(max_bpp, internal_bpp); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 043019c..ca70116 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -30,6 +30,7 @@ #include "qpu/qpu_disasm.h" #include "compiler/nir/nir_builder.h" +#include "nir/nir_vulkan.h" #include "nir/nir_serialize.h" #include "util/u_atomic.h" @@ -245,6 +246,31 @@ v3dv_pipeline_get_nir_options(void) return &v3dv_nir_options; } +static const struct vk_ycbcr_conversion * +lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set, + uint32_t binding, uint32_t array_index) +{ + struct v3dv_pipeline_layout *pipeline_layout = + (struct v3dv_pipeline_layout *) _pipeline_layout; + + assert(set < pipeline_layout->num_sets); + struct v3dv_descriptor_set_layout *set_layout = + pipeline_layout->set[set].layout; + + assert(binding < set_layout->binding_count); + struct v3dv_descriptor_set_binding_layout *bind_layout = + &set_layout->binding[binding]; + + if (bind_layout->immutable_samplers_offset) { + const struct v3dv_sampler *immutable_samplers = + v3dv_immutable_samplers(set_layout, bind_layout); + const struct v3dv_sampler *sampler = &immutable_samplers[array_index]; + return sampler->conversion; + } else { + return NULL; + } +} + static void preprocess_nir(nir_shader *nir) { @@ -381,7 +407,8 @@ descriptor_map_add(struct v3dv_descriptor_map *map, int array_index, int array_size, int start_index, - uint8_t return_size) + uint8_t return_size, + uint8_t plane) { assert(array_index < array_size); assert(return_size == 16 || return_size == 32); @@ -391,7 +418,8 @@ descriptor_map_add(struct v3dv_descriptor_map *map, if (map->used[index] && set == map->set[index] && binding == map->binding[index] && - array_index == map->array_index[index]) { + array_index == map->array_index[index] && + plane == map->plane[index]) { assert(array_size == map->array_size[index]); if (return_size != map->return_size[index]) { /* It the return_size is different it means that the same sampler @@ -416,6 +444,7 @@ descriptor_map_add(struct v3dv_descriptor_map *map, map->array_index[index] = array_index; map->array_size[index] = array_size; map->return_size[index] = return_size; + map->plane[index] = plane; map->num_desc = MAX2(map->num_desc, index + 1); return index; @@ -523,7 +552,8 @@ lower_vulkan_resource_index(nir_builder *b, const_val->u32, binding_layout->array_size, start_index, - 32 /* return_size: doesn't really apply for this case */); + 32 /* return_size: doesn't really apply for this case */, + 0); /* We always reserve index 0 for push constants */ if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || @@ -549,6 +579,18 @@ lower_vulkan_resource_index(nir_builder *b, nir_instr_remove(&instr->instr); } +static uint8_t +tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) +{ + int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane); + if (plane_src_idx < 0) + return 0; + + uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src); + nir_tex_instr_remove_src(tex, plane_src_idx); + return plane; +} + /* Returns return_size, so it could be used for the case of not having a * sampler object */ @@ -564,6 +606,8 @@ lower_tex_src(nir_builder *b, nir_tex_src *src = &instr->src[src_idx]; bool is_sampler = src->src_type == nir_tex_src_sampler_deref; + uint8_t plane = tex_instr_get_and_remove_plane_src(instr); + /* We compute first the offsets */ nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); while (deref->deref_type != nir_deref_type_var) { @@ -648,7 +692,8 @@ lower_tex_src(nir_builder *b, array_index, binding_layout->array_size, 0, - return_size); + return_size, + plane); if (is_sampler) instr->sampler_index = desc_index; @@ -755,7 +800,8 @@ lower_image_deref(nir_builder *b, array_index, binding_layout->array_size, 0, - 32 /* return_size: doesn't apply for textures */); + 32 /* return_size: doesn't apply for textures */, + 0); /* Note: we don't need to do anything here in relation to the precision and * the output size because for images we can infer that info from the image @@ -1107,11 +1153,16 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, /* If logic operations are enabled then we might emit color reads and we * need to know the color buffer format and swizzle for that + * */ if (key->logicop_func != PIPE_LOGICOP_COPY) { + /* Framebuffer formats should be single plane */ + assert(vk_format_get_plane_count(fb_format) == 1); key->color_fmt[i].format = fb_pipe_format; memcpy(key->color_fmt[i].swizzle, - v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format), + v3dv_get_format_swizzle(p_stage->pipeline->device, + fb_format, + 0), sizeof(key->color_fmt[i].swizzle)); } @@ -1665,6 +1716,9 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline, assert(pipeline->shared_data && pipeline->shared_data->maps[p_stage->stage]); + NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex, + lookup_ycbcr_conversion, layout); + nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); /* We add this because we need a valid sampler for nir_lower_tex to do @@ -1678,10 +1732,10 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline, pipeline->shared_data->maps[p_stage->stage]; UNUSED unsigned index; - index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16); + index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0); assert(index == V3DV_NO_SAMPLER_16BIT_IDX); - index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32); + index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0); assert(index == V3DV_NO_SAMPLER_32BIT_IDX); /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ @@ -1921,9 +1975,11 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, * need to know the color buffer format and swizzle for that */ if (key->logicop_func != PIPE_LOGICOP_COPY) { + /* Framebuffer formats should be single plane */ + assert(vk_format_get_plane_count(fb_format) == 1); key->color_fmt[i].format = fb_pipe_format; memcpy(key->color_fmt[i].swizzle, - v3dv_get_format_swizzle(pipeline->device, fb_format), + v3dv_get_format_swizzle(pipeline->device, fb_format, 0), sizeof(key->color_fmt[i].swizzle)); } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 0628d43..f36fa4d 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -46,6 +46,7 @@ #include "vk_sync.h" #include "vk_sync_timeline.h" #include "vk_util.h" +#include "vk_ycbcr_conversion.h" #include "vk_command_buffer.h" #include "vk_command_pool.h" @@ -220,6 +221,7 @@ void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device); void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device); bool v3dv_meta_can_use_tlb(struct v3dv_image *image, + uint8_t plane, const VkOffset3D *offset, VkFormat *compat_format); @@ -605,9 +607,8 @@ struct v3dv_device_memory { #define V3D_OUTPUT_IMAGE_FORMAT_NO 255 #define TEXTURE_DATA_FORMAT_NO 255 -struct v3dv_format { - bool supported; - +#define V3DV_MAX_PLANE_COUNT 3 +struct v3dv_format_plane { /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ uint8_t rt_type; @@ -623,11 +624,45 @@ struct v3dv_format { /* Whether the return value is 16F/I/UI or 32F/I/UI. */ uint8_t return_size; +}; + +struct v3dv_format { + /* Non 0 plane count implies supported */ + uint8_t plane_count; + + struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT]; /* If the format supports (linear) filtering when texturing. */ bool supports_filtering; }; +/* Note that although VkImageAspectFlags would allow to combine more than one + * PLANE bit, for all the use cases we implement that use VkImageAspectFlags, + * only one plane is allowed, like for example vkCmdCopyImage: + * + * "If srcImage has a VkFormat with two planes then for each element of + * pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT + * or VK_IMAGE_ASPECT_PLANE_1_BIT" + * + */ +static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect) +{ + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + case VK_IMAGE_ASPECT_DEPTH_BIT: + case VK_IMAGE_ASPECT_STENCIL_BIT: + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + case VK_IMAGE_ASPECT_PLANE_0_BIT: + return 0; + case VK_IMAGE_ASPECT_PLANE_1_BIT: + return 1; + case VK_IMAGE_ASPECT_PLANE_2_BIT: + return 2; + default: + unreachable("invalid image aspect"); + } +} + struct v3d_resource_slice { uint32_t offset; uint32_t stride; @@ -649,16 +684,42 @@ struct v3dv_image { struct vk_image vk; const struct v3dv_format *format; - uint32_t cpp; bool tiled; - struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; - uint64_t size; /* Total size in bytes */ - uint32_t cube_map_stride; + uint8_t plane_count; - struct v3dv_device_memory *mem; - VkDeviceSize mem_offset; - uint32_t alignment; + /* If 0, this is a multi-plane image with use disjoint memory, where each + * plane binds a different device memory. Otherwise, all the planes share + * the same device memory and this stores the total size of the image in + * bytes. + */ + uint32_t non_disjoint_size; + + struct { + uint32_t cpp; + + struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; + /* Total size of the plane in bytes. */ + uint64_t size; + uint32_t cube_map_stride; + + /* If not using disjoint memory, mem and mem_offset is the same for all + * planes, in which case mem_offset is the offset of plane 0. + */ + struct v3dv_device_memory *mem; + VkDeviceSize mem_offset; + uint32_t alignment; + + /* Pre-subsampled per plane width and height + */ + uint32_t width; + uint32_t height; + + /* Even if we can get it from the parent image format, we keep the + * format here for convenience + */ + VkFormat vk_format; + } planes[V3DV_MAX_PLANE_COUNT]; #ifdef ANDROID /* Image is backed by VK_ANDROID_native_buffer, */ @@ -674,6 +735,18 @@ v3dv_image_init(struct v3dv_device *device, VkImageViewType v3dv_image_type_to_view_type(VkImageType type); +static uint32_t +v3dv_image_aspect_to_plane(const struct v3dv_image *image, + VkImageAspectFlagBits aspect) +{ + assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects)); + + /* Because we always put image and view planes in aspect-bit-order, the + * plane index is the number of bits in the image aspect before aspect. + */ + return util_bitcount(image->vk.aspects & (aspect - 1)); +} + /* Pre-generating packets needs to consider changes in packet sizes across hw * versions. Keep things simple and allocate enough space for any supported * version. We ensure the size is large enough through static asserts. @@ -691,36 +764,43 @@ struct v3dv_image_view { struct vk_image_view vk; const struct v3dv_format *format; - bool swap_rb; - bool channel_reverse; - uint32_t internal_bpp; - uint32_t internal_type; - uint32_t offset; - /* Precomputed (composed from createinfo->components and formar swizzle) - * swizzles to pass in to the shader key. - * - * This could be also included on the descriptor bo, but the shader state - * packet doesn't need it on a bo, so we can just avoid a memory copy - */ - uint8_t swizzle[4]; + uint8_t plane_count; + struct { + uint8_t image_plane; - /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info - * during UpdateDescriptorSets. - * - * Empirical tests show that cube arrays need a different shader state - * depending on whether they are used with a sampler or not, so for these - * we generate two states and select the one to use based on the descriptor - * type. - */ - uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH]; + bool swap_rb; + bool channel_reverse; + uint32_t internal_bpp; + uint32_t internal_type; + uint32_t offset; + + /* Precomputed (composed from createinfo->components and formar swizzle) + * swizzles to pass in to the shader key. + * + * This could be also included on the descriptor bo, but the shader state + * packet doesn't need it on a bo, so we can just avoid a memory copy + */ + uint8_t swizzle[4]; + + /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info + * during UpdateDescriptorSets. + * + * Empirical tests show that cube arrays need a different shader state + * depending on whether they are used with a sampler or not, so for these + * we generate two states and select the one to use based on the descriptor + * type. + */ + uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH]; + } planes[V3DV_MAX_PLANE_COUNT]; }; VkResult v3dv_create_image_view(struct v3dv_device *device, const VkImageViewCreateInfo *pCreateInfo, VkImageView *pView); -uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer); +uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer, + uint8_t plane); struct v3dv_buffer { struct vk_object_base base; @@ -1088,6 +1168,7 @@ struct v3dv_copy_buffer_to_image_cpu_job_info { uint32_t mip_level; uint32_t base_layer; uint32_t layer_count; + uint8_t plane; }; struct v3dv_csd_indirect_cpu_job_info { @@ -1904,6 +1985,11 @@ struct v3dv_descriptor_set_binding_layout { * if there are no immutable samplers. */ uint32_t immutable_samplers_offset; + + /* Descriptors for multiplanar combined image samplers are larger. + * For mutable descriptors, this is always 1. + */ + uint8_t plane_stride; }; struct v3dv_descriptor_set_layout { @@ -2021,6 +2107,7 @@ struct v3dv_descriptor_map { int binding[DESCRIPTOR_MAP_SIZE]; int array_index[DESCRIPTOR_MAP_SIZE]; int array_size[DESCRIPTOR_MAP_SIZE]; + uint8_t plane[DESCRIPTOR_MAP_SIZE]; bool used[DESCRIPTOR_MAP_SIZE]; /* NOTE: the following is only for sampler, but this is the easier place to @@ -2031,15 +2118,17 @@ struct v3dv_descriptor_map { struct v3dv_sampler { struct vk_object_base base; + struct vk_ycbcr_conversion *conversion; bool compare_enable; bool unnormalized_coordinates; bool clamp_to_transparent_black_border; - /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu + /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu * configuration. If needed it will be copied to the descriptor info during * UpdateDescriptorSets */ + uint8_t plane_count; uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH]; }; @@ -2301,7 +2390,8 @@ uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev); #define v3dv_debug_ignored_stype(sType) \ mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType)) -const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f); +const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f, + uint8_t plane); const struct v3dv_format * v3dv_get_compatible_tfu_format(struct v3dv_device *device, uint32_t bpp, VkFormat *out_vk_format); diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 97f3b7b..82dca35 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -312,7 +312,7 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue, return result; /* Map BOs */ - struct v3dv_bo *dst_bo = info->image->mem->bo; + struct v3dv_bo *dst_bo = info->image->planes[info->plane].mem->bo; assert(!dst_bo->map || dst_bo->map_size == dst_bo->size); if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size)) return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -325,7 +325,7 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue, void *src_ptr = src_bo->map; const struct v3d_resource_slice *slice = - &info->image->slices[info->mip_level]; + &info->image->planes[info->plane].slices[info->mip_level]; const struct pipe_box box = { info->image_offset.x, info->image_offset.y, info->base_layer, @@ -335,14 +335,15 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue, /* Copy each layer */ for (uint32_t i = 0; i < info->layer_count; i++) { const uint32_t dst_offset = - v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i); + v3dv_layer_offset(info->image, info->mip_level, + info->base_layer + i, info->plane); const uint32_t src_offset = info->buffer->mem_offset + info->buffer_offset + info->buffer_layer_stride * i; v3d_store_tiled_image( dst_ptr + dst_offset, slice->stride, src_ptr + src_offset, info->buffer_stride, - slice->tiling, info->image->cpp, slice->padded_height, &box); + slice->tiling, info->image->planes[info->plane].cpp, slice->padded_height, &box); } return VK_SUCCESS; diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c index 8ad5ef3..63c75e9 100644 --- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c @@ -131,17 +131,29 @@ cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, uint32_t buffer) { const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + + /* We don't support rendering to ycbcr images, so the image view should be + * single-plane, and using a single-plane format. But note that the underlying + * image can be a ycbcr format, as we support rendering to a specific plane + * of an image. This is used for example on some meta_copy code paths, in + * order to copy from/to a plane of a ycbcr image. + */ + assert(iview->plane_count == 1); + assert(iview->format->plane_count == 1); + + uint8_t image_plane = v3dv_plane_from_aspect(iview->vk.aspects); const struct v3d_resource_slice *slice = - &image->slices[iview->vk.base_mip_level]; + &image->planes[image_plane].slices[iview->vk.base_mip_level]; + uint32_t layer_offset = v3dv_layer_offset(image, iview->vk.base_mip_level, - iview->vk.base_array_layer + layer); + iview->vk.base_array_layer + layer, image_plane); cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { load.buffer_to_load = buffer; - load.address = v3dv_cl_address(image->mem->bo, layer_offset); + load.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset); - load.input_image_format = iview->format->rt_type; + load.input_image_format = iview->format->planes[0].rt_type; /* If we create an image view with only the stencil format, we * re-interpret the format as RGBA8_UINT, as it is want we want in @@ -151,13 +163,13 @@ cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, * buffer, we need to use the underlying DS format. */ if (buffer == ZSTENCIL && - iview->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { - assert(image->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); - load.input_image_format = image->format->rt_type; + iview->format->planes[0].rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { + assert(image->format->planes[image_plane].rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); + load.input_image_format = image->format->planes[image_plane].rt_type; } - load.r_b_swap = iview->swap_rb; - load.channel_reverse = iview->channel_reverse; + load.r_b_swap = iview->planes[0].swap_rb; + load.channel_reverse = iview->planes[0].channel_reverse; load.memory_format = slice->tiling; if (slice->tiling == V3D_TILING_UIF_NO_XOR || @@ -315,18 +327,30 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, const struct v3dv_image_view *iview = cmd_buffer->state.attachments[attachment_idx].image_view; const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + + /* We don't support rendering to ycbcr images, so the image view should be + * one-plane, and using a single-plane format. But note that the underlying + * image can be a ycbcr format, as we support rendering to a specific plane + * of an image. This is used for example on some meta_copy code paths, in + * order to copy from/to a plane of a ycbcr image. + */ + assert(iview->plane_count == 1); + assert(iview->format->plane_count == 1); + + uint8_t image_plane = v3dv_plane_from_aspect(iview->vk.aspects); const struct v3d_resource_slice *slice = - &image->slices[iview->vk.base_mip_level]; + &image->planes[image_plane].slices[iview->vk.base_mip_level]; uint32_t layer_offset = v3dv_layer_offset(image, iview->vk.base_mip_level, - iview->vk.base_array_layer + layer); + iview->vk.base_array_layer + layer, + image_plane); cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { store.buffer_to_store = buffer; - store.address = v3dv_cl_address(image->mem->bo, layer_offset); + store.address = v3dv_cl_address(image->planes[image_plane].mem->bo, layer_offset); store.clear_buffer_being_stored = clear; - store.output_image_format = iview->format->rt_type; + store.output_image_format = iview->format->planes[0].rt_type; /* If we create an image view with only the stencil format, we * re-interpret the format as RGBA8_UINT, as it is want we want in @@ -336,13 +360,13 @@ cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, * buffer, we need to use the underlying DS format. */ if (buffer == ZSTENCIL && - iview->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { - assert(image->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); - store.output_image_format = image->format->rt_type; + iview->format->planes[0].rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { + assert(image->format->planes[image_plane].rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); + store.output_image_format = image->format->planes[image_plane].rt_type; } - store.r_b_swap = iview->swap_rb; - store.channel_reverse = iview->channel_reverse; + store.r_b_swap = iview->planes[0].swap_rb; + store.channel_reverse = iview->planes[0].channel_reverse; store.memory_format = slice->tiling; if (slice->tiling == V3D_TILING_UIF_NO_XOR || @@ -805,7 +829,14 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { const struct v3dv_image_view *iview = state->attachments[ds_attachment_idx].image_view; - config.internal_depth_type = iview->internal_type; + + /* At this point the image view should be single-plane. But note that + * the underlying image can be multi-plane, and the image view refer + * to one specific plane. + */ + assert(iview->plane_count == 1); + assert(iview->format->plane_count == 1); + config.internal_depth_type = iview->planes[0].internal_type; set_rcl_early_z_config(job, &config.early_z_disable, @@ -881,10 +912,13 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view; + assert(iview->plane_count == 1); const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + + uint8_t plane = v3dv_plane_from_aspect(iview->vk.aspects); const struct v3d_resource_slice *slice = - &image->slices[iview->vk.base_mip_level]; + &image->planes[plane].slices[iview->vk.base_mip_level]; const uint32_t *clear_color = &state->attachments[attachment_idx].clear_value.color[0]; @@ -892,7 +926,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) uint32_t clear_pad = 0; if (slice->tiling == V3D_TILING_UIF_NO_XOR || slice->tiling == V3D_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->cpp) * 2; + int uif_block_height = v3d_utile_height(image->planes[plane].cpp) * 2; uint32_t implicit_padded_height = align(framebuffer->height, uif_block_height) / uif_block_height; @@ -909,7 +943,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) clear.render_target_number = i; }; - if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { + if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_64) { cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { clear.clear_color_mid_low_32_bits = ((clear_color[1] >> 24) | (clear_color[2] << 8)); @@ -919,7 +953,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) }; } - if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + if (iview->planes[0].internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { clear.uif_padded_height_in_uif_blocks = clear_pad; clear.clear_color_high_16_bits = clear_color[3] >> 16; @@ -2329,8 +2363,9 @@ v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buf struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view; assert(vk_format_is_color(iview->vk.format)); - *rt_bpp = iview->internal_bpp; - *rt_type = iview->internal_type; + assert(iview->plane_count == 1); + *rt_bpp = iview->planes[0].internal_bpp; + *rt_type = iview->planes[0].internal_type; if (vk_format_is_int(iview->vk.view_format)) *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; else if (vk_format_is_srgb(iview->vk.view_format)) diff --git a/src/broadcom/vulkan/v3dvx_descriptor_set.c b/src/broadcom/vulkan/v3dvx_descriptor_set.c index b3aec9e..ced7b7e 100644 --- a/src/broadcom/vulkan/v3dvx_descriptor_set.c +++ b/src/broadcom/vulkan/v3dvx_descriptor_set.c @@ -86,13 +86,15 @@ v3dX(max_descriptor_bo_size)(void) uint32_t -v3dX(combined_image_sampler_texture_state_offset)(void) +v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane) { - return 0; + return v3dX(descriptor_bo_size)(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) * + plane; } uint32_t -v3dX(combined_image_sampler_sampler_state_offset)(void) +v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane) { - return cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32); + return v3dX(combined_image_sampler_texture_state_offset)(plane) + + cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32); } diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c index 9fdb47c..cc1e8d7 100644 --- a/src/broadcom/vulkan/v3dvx_device.c +++ b/src/broadcom/vulkan/v3dvx_device.c @@ -58,6 +58,15 @@ static union pipe_color_union encode_border_color( const struct v3dv_format *format = v3dX(get_format)(bc_info->format); + /* YCbCr doesn't interact with border color at all. From spec: + * + * "If sampler YCBCR conversion is enabled, addressModeU, addressModeV, + * and addressModeW must be VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + * anisotropyEnable must be VK_FALSE, and unnormalizedCoordinates must + * be VK_FALSE" + */ + assert(format->plane_count == 1); + /* We use the swizzle in our format table to determine swizzle configuration * for sampling as well as to decide if we need to use the Swap R/B and * Reverse Channels bits for Tile Load/Store operations. The order of the @@ -68,19 +77,19 @@ static union pipe_color_union encode_border_color( * colors so we need to fix up the swizzle manually for this case. */ uint8_t swizzle[4]; - if (v3dv_format_swizzle_needs_reverse(format->swizzle) && - v3dv_format_swizzle_needs_rb_swap(format->swizzle)) { + if (v3dv_format_swizzle_needs_reverse(format->planes[0].swizzle) && + v3dv_format_swizzle_needs_rb_swap(format->planes[0].swizzle)) { swizzle[0] = PIPE_SWIZZLE_W; swizzle[1] = PIPE_SWIZZLE_X; swizzle[2] = PIPE_SWIZZLE_Y; swizzle[3] = PIPE_SWIZZLE_Z; } else { - memcpy(swizzle, format->swizzle, sizeof (swizzle)); + memcpy(swizzle, format->planes[0].swizzle, sizeof (swizzle)); } union pipe_color_union border; for (int i = 0; i < 4; i++) { - if (format->swizzle[i] <= 3) + if (format->planes[0].swizzle[i] <= 3) border.ui[i] = bc_info->customBorderColor.uint32[swizzle[i]]; else border.ui[i] = 0; @@ -274,9 +283,10 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( const struct v3dv_image_view *att = attachments[att_idx].image_view; assert(att); + assert(att->plane_count == 1); if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) - *max_bpp = MAX2(*max_bpp, att->internal_bpp); + *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp); if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) *msaa = true; @@ -298,9 +308,10 @@ v3dX(framebuffer_compute_internal_bpp_msaa)( for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { const struct v3dv_image_view *att = attachments[i].image_view; assert(att); + assert(att->plane_count == 1); if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) - *max_bpp = MAX2(*max_bpp, att->internal_bpp); + *max_bpp = MAX2(*max_bpp, att->planes[0].internal_bpp); if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) *msaa = true; diff --git a/src/broadcom/vulkan/v3dvx_formats.c b/src/broadcom/vulkan/v3dvx_formats.c index 61ecc77..45a1cf6 100644 --- a/src/broadcom/vulkan/v3dvx_formats.c +++ b/src/broadcom/vulkan/v3dvx_formats.c @@ -27,6 +27,8 @@ #include "util/format/u_format.h" #include "vulkan/util/vk_util.h" +#include "vk_enum_to_str.h" +#include "vk_enum_defines.h" #define SWIZ(x,y,z,w) { \ PIPE_SWIZZLE_##x, \ @@ -37,14 +39,33 @@ #define FORMAT(vk, rt, tex, swiz, return_size, supports_filtering) \ [VK_ENUM_OFFSET(VK_FORMAT_##vk)] = { \ - true, \ - V3D_OUTPUT_IMAGE_FORMAT_##rt, \ - TEXTURE_DATA_FORMAT_##tex, \ - swiz, \ - return_size, \ + 1, \ + {{ \ + V3D_OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + }}, \ supports_filtering, \ } +#define PLANE(rt, tex, swiz, return_size) \ + { \ + V3D_OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size \ + } + +#define YCBCR_FORMAT(vk, supports_filtering, plane_count, ...) \ + [VK_ENUM_OFFSET(VK_FORMAT_##vk)] = { \ + plane_count, \ + { \ + __VA_ARGS__, \ + }, \ + supports_filtering, \ + } + #define SWIZ_X001 SWIZ(X, 0, 0, 1) #define SWIZ_XY01 SWIZ(X, Y, 0, 1) #define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) @@ -220,19 +241,36 @@ static const struct v3dv_format format_table_4444[] = { FORMAT(A4R4G4B4_UNORM_PACK16_EXT, ABGR4444, RGBA4, SWIZ_YZWX, 16, true), /* Reverse + RB swap */ }; +static const struct v3dv_format format_table_ycbcr[] = { + YCBCR_FORMAT(G8_B8R8_2PLANE_420_UNORM, false, 2, + PLANE(R8, R8, SWIZ(X, 0, 0, 1), 16), + PLANE(RG8, RG8, SWIZ(X, Y, 0, 1), 16) + ), + YCBCR_FORMAT(G8_B8_R8_3PLANE_420_UNORM, false, 3, + PLANE(R8, R8, SWIZ(X, 0, 0, 1), 16), + PLANE(R8, R8, SWIZ(X, 0, 0, 1), 16), + PLANE(R8, R8, SWIZ(X, 0, 0, 1), 16) + ), +}; + const struct v3dv_format * v3dX(get_format)(VkFormat format) { /* Core formats */ - if (format < ARRAY_SIZE(format_table) && format_table[format].supported) + if (format < ARRAY_SIZE(format_table) && format_table[format].plane_count) return &format_table[format]; - switch (format) { - /* VK_EXT_4444_formats */ - case VK_FORMAT_A4R4G4B4_UNORM_PACK16: - case VK_FORMAT_A4B4G4R4_UNORM_PACK16: - return &format_table_4444[VK_ENUM_OFFSET(format)]; - + uint32_t ext_number = VK_ENUM_EXTENSION(format); + uint32_t enum_offset = VK_ENUM_OFFSET(format); + + switch (ext_number) { + case _VK_EXT_4444_formats_number: + return &format_table_4444[enum_offset]; + case _VK_KHR_sampler_ycbcr_conversion_number: + if (enum_offset < ARRAY_SIZE(format_table_ycbcr)) + return &format_table_ycbcr[enum_offset]; + else + return NULL; default: return NULL; } @@ -372,18 +410,32 @@ bool v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format) { uint32_t type, bpp; - v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp); + + /* Multiplanar images cannot be multisampled: + * + * "sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of + * the following conditions is true: (...) format is one of the formats + * that require a sampler Y′CBCR conversion (...)" + */ + if (!format->plane_count || format->plane_count > 1) + return false; + + v3dX(get_internal_type_bpp_for_output_format)(format->planes[0].rt_type, &type, &bpp); return type == V3D_INTERNAL_TYPE_8 || type == V3D_INTERNAL_TYPE_16F; } bool v3dX(format_supports_blending)(const struct v3dv_format *format) { + /* ycbcr formats don't support blending */ + if (!format->plane_count || format->plane_count > 1) + return false; + /* Hardware blending is only supported on render targets that are configured * 4x8-bit unorm, 2x16-bit float or 4x16-bit float. */ uint32_t type, bpp; - v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp); + v3dX(get_internal_type_bpp_for_output_format)(format->planes[0].rt_type, &type, &bpp); switch (type) { case V3D_INTERNAL_TYPE_8: return bpp == V3D_INTERNAL_BPP_32; @@ -485,7 +537,9 @@ v3dX(get_internal_type_bpp_for_image_aspects)(VkFormat vk_format, } } else { const struct v3dv_format *format = v3dX(get_format)(vk_format); - v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, + /* We only expect this to be called for single-plane formats */ + assert(format->plane_count == 1); + v3dX(get_internal_type_bpp_for_output_format)(format->planes[0].rt_type, internal_type, internal_bpp); } } diff --git a/src/broadcom/vulkan/v3dvx_image.c b/src/broadcom/vulkan/v3dvx_image.c index cbe9f3c..2da58ca 100644 --- a/src/broadcom/vulkan/v3dvx_image.c +++ b/src/broadcom/vulkan/v3dvx_image.c @@ -45,80 +45,84 @@ pack_texture_shader_state_helper(struct v3dv_device *device, image->vk.samples == VK_SAMPLE_COUNT_4_BIT); const uint32_t msaa_scale = image->vk.samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2; - v3dvx_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) { - - tex.level_0_is_strictly_uif = - (image->slices[0].tiling == V3D_TILING_UIF_XOR || - image->slices[0].tiling == V3D_TILING_UIF_NO_XOR); - - tex.level_0_xor_enable = (image->slices[0].tiling == V3D_TILING_UIF_XOR); - - if (tex.level_0_is_strictly_uif) - tex.level_0_ub_pad = image->slices[0].ub_pad; - - /* FIXME: v3d never sets uif_xor_disable, but uses it on the following - * check so let's set the default value - */ - tex.uif_xor_disable = false; - if (tex.uif_xor_disable || - tex.level_0_is_strictly_uif) { - tex.extended = true; - } - - tex.base_level = image_view->vk.base_mip_level; - tex.max_level = image_view->vk.base_mip_level + - image_view->vk.level_count - 1; - - tex.swizzle_r = v3d_translate_pipe_swizzle(image_view->swizzle[0]); - tex.swizzle_g = v3d_translate_pipe_swizzle(image_view->swizzle[1]); - tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->swizzle[2]); - tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->swizzle[3]); - - tex.reverse_standard_border_color = image_view->channel_reverse; - - tex.texture_type = image_view->format->tex_type; - - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - tex.image_depth = image->vk.extent.depth; - } else { - tex.image_depth = image_view->vk.layer_count; - } - - /* Empirical testing with CTS shows that when we are sampling from cube - * arrays we want to set image depth to layers / 6, but not when doing - * image load/store. - */ - if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && - !for_cube_map_array_storage) { - assert(tex.image_depth % 6 == 0); - tex.image_depth /= 6; - } - - tex.image_height = image->vk.extent.height * msaa_scale; - tex.image_width = image->vk.extent.width * msaa_scale; - - /* On 4.x, the height of a 1D texture is redefined to be the - * upper 14 bits of the width (which is only usable with txf). - */ - if (image->vk.image_type == VK_IMAGE_TYPE_1D) { - tex.image_height = tex.image_width >> 14; + for (uint8_t plane = 0; plane < image_view->plane_count; plane++) { + uint8_t iplane = image_view->planes[plane].image_plane; + v3dvx_pack(image_view->planes[plane].texture_shader_state[index], TEXTURE_SHADER_STATE, tex) { + + tex.level_0_is_strictly_uif = + (image->planes[iplane].slices[0].tiling == V3D_TILING_UIF_XOR || + image->planes[iplane].slices[0].tiling == V3D_TILING_UIF_NO_XOR); + + tex.level_0_xor_enable = (image->planes[iplane].slices[0].tiling == V3D_TILING_UIF_XOR); + + if (tex.level_0_is_strictly_uif) + tex.level_0_ub_pad = image->planes[iplane].slices[0].ub_pad; + + /* FIXME: v3d never sets uif_xor_disable, but uses it on the following + * check so let's set the default value + */ + tex.uif_xor_disable = false; + if (tex.uif_xor_disable || + tex.level_0_is_strictly_uif) { + tex.extended = true; + } + + tex.base_level = image_view->vk.base_mip_level; + tex.max_level = image_view->vk.base_mip_level + + image_view->vk.level_count - 1; + + tex.swizzle_r = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[0]); + tex.swizzle_g = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[1]); + tex.swizzle_b = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[2]); + tex.swizzle_a = v3d_translate_pipe_swizzle(image_view->planes[plane].swizzle[3]); + + tex.reverse_standard_border_color = image_view->planes[plane].channel_reverse; + + tex.texture_type = image_view->format->planes[plane].tex_type; + + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + tex.image_depth = image->vk.extent.depth; + } else { + tex.image_depth = image_view->vk.layer_count; + } + + /* Empirical testing with CTS shows that when we are sampling from cube + * arrays we want to set image depth to layers / 6, but not when doing + * image load/store. + */ + if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && + !for_cube_map_array_storage) { + assert(tex.image_depth % 6 == 0); + tex.image_depth /= 6; + } + + tex.image_height = image->planes[iplane].height * msaa_scale; + tex.image_width = image->planes[iplane].width * msaa_scale; + + /* On 4.x, the height of a 1D texture is redefined to be the + * upper 14 bits of the width (which is only usable with txf). + */ + if (image->vk.image_type == VK_IMAGE_TYPE_1D) + tex.image_height = tex.image_width >> 14; + + tex.image_width &= (1 << 14) - 1; + tex.image_height &= (1 << 14) - 1; + + tex.array_stride_64_byte_aligned = image->planes[iplane].cube_map_stride / 64; + + tex.srgb = vk_format_is_srgb(image_view->vk.view_format); + + /* At this point we don't have the job. That's the reason the first + * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to + * add the bo to the job. This also means that we need to add manually + * the image bo to the job using the texture. + */ + const uint32_t base_offset = + image->planes[iplane].mem->bo->offset + + v3dv_layer_offset(image, 0, image_view->vk.base_array_layer, + iplane); + tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); } - tex.image_width &= (1 << 14) - 1; - tex.image_height &= (1 << 14) - 1; - - tex.array_stride_64_byte_aligned = image->cube_map_stride / 64; - - tex.srgb = vk_format_is_srgb(image_view->vk.view_format); - - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to - * add the bo to the job. This also means that we need to add manually - * the image bo to the job using the texture. - */ - const uint32_t base_offset = - image->mem->bo->offset + - v3dv_layer_offset(image, 0, image_view->vk.base_array_layer); - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); } } @@ -156,7 +160,8 @@ v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, tex.image_width &= (1 << 14) - 1; tex.image_height &= (1 << 14) - 1; - tex.texture_type = buffer_view->format->tex_type; + assert(buffer_view->format->plane_count == 1); + tex.texture_type = buffer_view->format->planes[0].tex_type; tex.srgb = vk_format_is_srgb(buffer_view->vk_format); /* At this point we don't have the job. That's the reason the first diff --git a/src/broadcom/vulkan/v3dvx_meta_common.c b/src/broadcom/vulkan/v3dvx_meta_common.c index 2d43766..04147b8 100644 --- a/src/broadcom/vulkan/v3dvx_meta_common.c +++ b/src/broadcom/vulkan/v3dvx_meta_common.c @@ -66,11 +66,17 @@ emit_rcl_prologue(struct v3dv_job *job, uint32_t clear_pad = 0; if (clear_info->image) { const struct v3dv_image *image = clear_info->image; + + /* From vkCmdClearColorImage: + * "image must not use any of the formats that require a sampler + * YCBCR conversion" + */ + assert(image->plane_count == 1); const struct v3d_resource_slice *slice = - &image->slices[clear_info->level]; + &image->planes[0].slices[clear_info->level]; if (slice->tiling == V3D_TILING_UIF_NO_XOR || slice->tiling == V3D_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->cpp) * 2; + int uif_block_height = v3d_utile_height(image->planes[0].cpp) * 2; uint32_t implicit_padded_height = align(tiling->height, uif_block_height) / uif_block_height; @@ -259,6 +265,9 @@ choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, bool is_copy_to_buffer, bool is_copy_from_buffer) { + /* At this point the framebuffer was already lowered to single-plane */ + assert(framebuffer->format->plane_count == 1); + if (is_copy_to_buffer || is_copy_from_buffer) { switch (framebuffer->vk_format) { case VK_FORMAT_D16_UNORM: @@ -300,11 +309,11 @@ choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, } } default: /* Color formats */ - return framebuffer->format->rt_type; + return framebuffer->format->planes[0].rt_type; break; } } else { - return framebuffer->format->rt_type; + return framebuffer->format->planes[0].rt_type; } } @@ -312,7 +321,11 @@ static inline bool format_needs_rb_swap(struct v3dv_device *device, VkFormat format) { - const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); + /* We are calling these methods for framebuffer formats, that at this point + * should be single-plane + */ + assert(vk_format_get_plane_count(format) == 1); + const uint8_t *swizzle = v3dv_get_format_swizzle(device, format, 0); return v3dv_format_swizzle_needs_rb_swap(swizzle); } @@ -320,7 +333,11 @@ static inline bool format_needs_reverse(struct v3dv_device *device, VkFormat format) { - const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); + /* We are calling these methods for framebuffer formats, that at this point + * should be single-plane + */ + assert(vk_format_get_plane_count(format) == 1); + const uint8_t *swizzle = v3dv_get_format_swizzle(device, format, 0); return v3dv_format_swizzle_needs_reverse(swizzle); } @@ -335,22 +352,29 @@ emit_image_load(struct v3dv_device *device, bool is_copy_to_buffer, bool is_copy_from_buffer) { - uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + uint8_t plane = v3dv_plane_from_aspect(aspect); + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer, plane); + /* For multi-plane formats we are copying plane by plane to the color + * tlb. Framebuffer format was already selected to be a tlb single-plane + * compatible format. We still need to use the real plane to get the + * address etc from the source image. + */ + assert(framebuffer->format->plane_count == 1); /* For image to/from buffer copies we always load to and store from RT0, * even for depth/stencil aspects, because the hardware can't do raster * stores or loads from/to the depth/stencil tile buffers. */ bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + image->format->plane_count > 1 || aspect == VK_IMAGE_ASPECT_COLOR_BIT; - const struct v3d_resource_slice *slice = &image->slices[mip_level]; + const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level]; cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { load.buffer_to_load = load_to_color_tlb ? RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); - load.address = v3dv_cl_address(image->mem->bo, layer_offset); - + load.address = v3dv_cl_address(image->planes[plane].mem->bo, layer_offset); load.input_image_format = choose_tlb_format(framebuffer, aspect, false, is_copy_to_buffer, is_copy_from_buffer); @@ -420,17 +444,28 @@ emit_image_store(struct v3dv_device *device, bool is_copy_from_buffer, bool is_multisample_resolve) { - uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + uint8_t plane = v3dv_plane_from_aspect(aspect); + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer, plane); + + /* + * For multi-plane formats we are copying plane by plane to the color + * tlb. Framebuffer format was already selected to be a tlb single-plane + * compatible format. We still need to use the real plane to get the + * address etc. + */ + assert(framebuffer->format->plane_count == 1); bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + image->format->plane_count > 1 || aspect == VK_IMAGE_ASPECT_COLOR_BIT; - const struct v3d_resource_slice *slice = &image->slices[mip_level]; + const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level]; cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { store.buffer_to_store = store_from_color_tlb ? RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); - store.address = v3dv_cl_address(image->mem->bo, layer_offset); + store.address = v3dv_cl_address(image->planes[plane].mem->bo, layer_offset); + store.clear_buffer_being_stored = false; /* See rationale in emit_image_load() */ @@ -527,9 +562,10 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, * Vulkan spec states that the output buffer must have packed stencil * values, where each stencil value is 1 byte. */ + uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask); uint32_t cpp = region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? - 1 : image->cpp; + 1 : image->planes[plane].cpp; uint32_t buffer_stride = width * cpp; uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer_offset; @@ -845,7 +881,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, uint32_t src_cpp, uint32_t width, uint32_t height, - const struct v3dv_format *format) + const struct v3dv_format_plane *format_plane) { struct drm_v3d_submit_tfu tfu = { .ios = (height << 16) | width, @@ -864,7 +900,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, (src_tiling - V3D_TILING_LINEARTILE)) << V3D33_TFU_ICFG_FORMAT_SHIFT; } - tfu.icfg |= format->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT; + tfu.icfg |= format_plane->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT; tfu.ioa = dst_offset; @@ -1082,8 +1118,9 @@ emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); + uint8_t plane = v3dv_plane_from_aspect(imgrsc->aspectMask); uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? - 1 : image->cpp; + 1 : image->planes[plane].cpp; uint32_t buffer_stride = width * cpp; uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; diff --git a/src/broadcom/vulkan/v3dvx_pipeline.c b/src/broadcom/vulkan/v3dvx_pipeline.c index f96891c..ad904eb 100644 --- a/src/broadcom/vulkan/v3dvx_pipeline.c +++ b/src/broadcom/vulkan/v3dvx_pipeline.c @@ -84,7 +84,6 @@ pack_blend(struct v3dv_pipeline *pipeline, return; assert(pipeline->subpass->color_count == cb_info->attachmentCount); - pipeline->blend.needs_color_constants = false; uint32_t color_write_masks = 0; for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) { @@ -104,7 +103,12 @@ pack_blend(struct v3dv_pipeline *pipeline, VkAttachmentDescription2 *desc = &pipeline->pass->attachments[attachment_idx].desc; const struct v3dv_format *format = v3dX(get_format)(desc->format); - bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1); + + /* We only do blending with render pass attachments, so we should not have + * multiplanar images here + */ + assert(format->plane_count == 1); + bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1); uint8_t rt_mask = 1 << i; pipeline->blend.enables |= rt_mask; diff --git a/src/broadcom/vulkan/v3dvx_private.h b/src/broadcom/vulkan/v3dvx_private.h index 96d6610..c693952 100644 --- a/src/broadcom/vulkan/v3dvx_private.h +++ b/src/broadcom/vulkan/v3dvx_private.h @@ -243,7 +243,7 @@ v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, uint32_t src_cpp, uint32_t width, uint32_t height, - const struct v3dv_format *format); + const struct v3dv_format_plane *format_plane); void v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job, @@ -318,6 +318,6 @@ uint32_t v3dX(descriptor_bo_size)(VkDescriptorType type); uint32_t v3dX(max_descriptor_bo_size)(void); -uint32_t v3dX(combined_image_sampler_texture_state_offset)(void); +uint32_t v3dX(combined_image_sampler_texture_state_offset)(uint8_t plane); -uint32_t v3dX(combined_image_sampler_sampler_state_offset)(void); +uint32_t v3dX(combined_image_sampler_sampler_state_offset)(uint8_t plane); -- 2.7.4