vk2tu_single_stage(VkPipelineStageFlags2 vk_stage, bool dst)
{
if (vk_stage == VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT ||
- vk_stage == VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
+ vk_stage == VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT ||
+ vk_stage == VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT)
return TU_STAGE_CP;
if (vk_stage == VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT ||
#define A6XX_TEX_CONST_DWORDS 16
#define A6XX_TEX_SAMP_DWORDS 4
+/* We sample the fragment density map on the CPU, so technically the
+ * minimum/maximum texel size is arbitrary. However sizes smaller than the
+ * minimum tile width alignment of 32 are likely pointless, so we use that as
+ * the minimum value. For the maximum just pick a value larger than anyone
+ * would reasonably need.
+ */
+#define MIN_FDM_TEXEL_SIZE_LOG2 5
+#define MIN_FDM_TEXEL_SIZE (1u << MIN_FDM_TEXEL_SIZE_LOG2)
+#define MAX_FDM_TEXEL_SIZE_LOG2 10
+#define MAX_FDM_TEXEL_SIZE (1u << MAX_FDM_TEXEL_SIZE_LOG2)
+
#define TU_FROM_HANDLE(__tu_type, __name, __handle) \
VK_FROM_HANDLE(__tu_type, __name, __handle)
}
VKAPI_ATTR VkResult VKAPI_CALL
-tu_BindImageMemory2(VkDevice device,
+tu_BindImageMemory2(VkDevice _device,
uint32_t bindInfoCount,
const VkBindImageMemoryInfo *pBindInfos)
{
+ TU_FROM_HANDLE(tu_device, device, _device);
+
for (uint32_t i = 0; i < bindInfoCount; ++i) {
TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
if (mem) {
image->bo = mem->bo;
image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
+
+ if (image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
+ if (!mem->bo->map) {
+ VkResult result = tu_bo_map(device, mem->bo);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ image->map = (char *)mem->bo->map + pBindInfos[i].memoryOffset;
+ } else {
+ image->map = NULL;
+ }
} else {
image->bo = NULL;
+ image->map = NULL;
image->iova = 0;
}
}
if (physical_device->vk.supported_extensions.EXT_filter_cubic)
optimal |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT;
}
+
+ /* We sample on the CPU so we can technically support anything as long
+ * as it's floating point, but this restricts it to "reasonable" formats
+ * to use, which means two channels and not something weird like
+ * luminance-alpha.
+ */
+ if (util_format_is_float(format) &&
+ desc->nr_channels == 2 && desc->swizzle[0] == PIPE_SWIZZLE_X &&
+ desc->swizzle[1] == PIPE_SWIZZLE_Y) {
+ optimal |= VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT;
+ }
}
if (supported_color) {
layouts[2] = &image->layout[2];
}
+ vk_component_mapping_to_pipe_swizzle(pCreateInfo->components,
+ iview->swizzle);
+
struct fdl_view_args args = {};
args.iova = image->iova;
args.base_array_layer = range->baseArrayLayer;
ubwc_enabled = false;
}
+ /* Fragment density maps are sampled on the CPU and we don't support
+ * sampling tiled images on the CPU or UBWC at the moment.
+ */
+ if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
+ tile_mode = TILE6_LINEAR;
+ ubwc_enabled = false;
+ }
+
enum pipe_format format =
tu_vk_format_to_pipe_format(image->vk.format);
/* Whether a view of the image with an R8G8 format could be made. */
vk_object_free(&device->vk, pAllocator, view);
}
+
+/* Impelements the operations described in "Fragment Density Map Operations."
+ */
+void
+tu_fragment_density_map_sample(const struct tu_image_view *fdm,
+ uint32_t x, uint32_t y,
+ uint32_t width, uint32_t height,
+ uint32_t layers,
+ struct tu_frag_area *areas)
+{
+ assert(fdm->image->layout[0].tile_mode == TILE6_LINEAR);
+
+ uint32_t fdm_shift_x = util_logbase2_ceil(DIV_ROUND_UP(width, fdm->vk.extent.width));
+ uint32_t fdm_shift_y = util_logbase2_ceil(DIV_ROUND_UP(height, fdm->vk.extent.height));
+
+ fdm_shift_x = CLAMP(fdm_shift_x, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
+ fdm_shift_y = CLAMP(fdm_shift_y, MIN_FDM_TEXEL_SIZE_LOG2, MAX_FDM_TEXEL_SIZE_LOG2);
+
+ uint32_t i = x >> fdm_shift_x;
+ uint32_t j = y >> fdm_shift_y;
+
+ unsigned cpp = fdm->image->layout[0].cpp;
+ unsigned pitch = fdm->view.pitch;
+
+ void *pixel = (char *)fdm->image->map + fdm->view.offset + cpp * i + pitch * j;
+ for (unsigned i = 0; i < layers; i++) {
+ float density_src[4], density[4];
+ util_format_unpack_rgba(fdm->view.format, density_src, pixel, 1);
+ pipe_swizzle_4f(density, density_src, fdm->swizzle);
+ areas[i].width = 1.0f / density[0];
+ areas[i].height = 1.0f / density[1];
+
+ pixel = (char *)pixel + fdm->view.layer_size;
+ }
+}
struct tu_bo *bo;
uint64_t iova;
+ /* For fragment density map */
+ void *map;
+
uint32_t lrz_height;
uint32_t lrz_pitch;
uint32_t lrz_offset;
struct fdl6_view view;
+ unsigned char swizzle[4];
+
/* for d32s8 separate depth */
uint64_t depth_base_addr;
uint32_t depth_layer_size;
struct tu_device *device,
const VkBufferViewCreateInfo *pCreateInfo);
+struct tu_frag_area {
+ float width;
+ float height;
+};
+
+void
+tu_fragment_density_map_sample(const struct tu_image_view *fdm,
+ uint32_t x, uint32_t y,
+ uint32_t width, uint32_t height,
+ uint32_t layers, struct tu_frag_area *areas);
+
#endif /* TU_IMAGE_H */