if (!batch->framebuffer.gpu) {
unsigned size = (dev->quirks & MIDGARD_SFBD) ?
MALI_SINGLE_TARGET_FRAMEBUFFER_LENGTH :
- sizeof(struct mali_framebuffer);
+ MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64);
/* Tag the pointer */
if (!(dev->quirks & MIDGARD_SFBD))
- batch->framebuffer.gpu |= MALI_MFBD;
+ batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
}
return batch->framebuffer.gpu;
#include "pan_util.h"
#include "panfrost-quirks.h"
-static struct mali_rt_format
-panfrost_mfbd_format(struct pipe_surface *surf)
+
+static bool
+panfrost_mfbd_has_zs_crc_ext(struct panfrost_batch *batch)
+{
+ if (batch->key.nr_cbufs == 1) {
+ struct pipe_surface *surf = batch->key.cbufs[0];
+ struct panfrost_resource *rsrc = pan_resource(surf->texture);
+
+ if (rsrc->checksummed)
+ return true;
+ }
+
+ if (batch->key.zsbuf &&
+ ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
+ return true;
+
+ return false;
+}
+
+static unsigned
+panfrost_mfbd_size(struct panfrost_batch *batch)
+{
+ unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
+
+ return MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH +
+ (panfrost_mfbd_has_zs_crc_ext(batch) * MALI_ZS_CRC_EXTENSION_LENGTH) +
+ (rt_count * MALI_RENDER_TARGET_LENGTH);
+}
+
+static void
+panfrost_mfbd_rt_init_format(struct pipe_surface *surf,
+ struct MALI_RENDER_TARGET *rt)
{
/* Explode details on the format */
unsigned char swizzle[4];
panfrost_invert_swizzle(desc->swizzle, swizzle);
- /* Fill in accordingly, defaulting to 8-bit UNORM */
+ rt->swizzle = panfrost_translate_swizzle_4(swizzle);
- struct mali_rt_format fmt = {
- .unk1 = 0x4000000,
- .unk2 = 0x1,
- .nr_channels = MALI_POSITIVE(desc->nr_channels),
- .unk3 = 0x4,
- .flags = 0x2,
- .swizzle = panfrost_translate_swizzle_4(swizzle),
- .no_preload = true
- };
+ /* Fill in accordingly, defaulting to 8-bit UNORM */
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- fmt.flags |= MALI_MFBD_FORMAT_SRGB;
+ rt->srgb = true;
/* sRGB handled as a dedicated flag */
enum pipe_format linearized = util_format_linear(surf->format);
- /* If RGB, we're good to go */
- if (util_format_is_unorm8(desc))
- return fmt;
+ if (util_format_is_unorm8(desc)) {
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
+ switch (desc->nr_channels) {
+ case 1:
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8;
+ break;
+ case 2:
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8;
+ break;
+ case 3:
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8B8;
+ break;
+ case 4:
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8B8A8;
+ break;
+ default:
+ unreachable("Invalid number of channels");
+ }
+
+ /* If RGB, we're good to go */
+ return;
+ }
/* Set flags for alternative formats */
switch (linearized) {
case PIPE_FORMAT_B5G6R5_UNORM:
- fmt.unk1 = 0x14000000;
- fmt.nr_channels = MALI_POSITIVE(2);
- fmt.unk3 |= 0x1;
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G6B5A0;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R5G6B5;
break;
case PIPE_FORMAT_A4B4G4R4_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_R4G4B4A4_UNORM:
- fmt.unk1 = 0x10000000;
- fmt.unk3 = 0x5;
- fmt.nr_channels = MALI_POSITIVE(1);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R4G4B4A4;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R4G4B4A4;
break;
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM:
- fmt.unk1 = 0x08000000;
- fmt.unk3 = 0x6;
- fmt.nr_channels = MALI_POSITIVE(1);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R10G10B10A2;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R10G10B10A2;
break;
case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_R5G5B5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM:
- fmt.unk1 = 0x18000000;
- fmt.unk3 = 0x7;
- fmt.nr_channels = MALI_POSITIVE(2);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G5B5A1;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R5G5B5A1;
break;
/* Generic 8-bit */
case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT:
- fmt.unk1 = 0x80000000;
- fmt.unk3 = 0x0;
- fmt.nr_channels = MALI_POSITIVE(1);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW8;
break;
/* Generic 32-bit */
case PIPE_FORMAT_R32_UINT:
case PIPE_FORMAT_R32_SINT:
case PIPE_FORMAT_R10G10B10A2_UINT:
- fmt.unk1 = 0x88000000;
- fmt.unk3 = 0x0;
- fmt.nr_channels = MALI_POSITIVE(4);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW32;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW32;
break;
/* Generic 16-bit */
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16_UINT:
case PIPE_FORMAT_R16_SINT:
- fmt.unk1 = 0x84000000;
- fmt.unk3 = 0x0;
- fmt.nr_channels = MALI_POSITIVE(2);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW16;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW16;
break;
/* Generic 64-bit */
case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SINT:
case PIPE_FORMAT_R16G16B16A16_UINT:
- fmt.unk1 = 0x8c000000;
- fmt.unk3 = 0x1;
- fmt.nr_channels = MALI_POSITIVE(2);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW64;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW64;
break;
/* Generic 128-bit */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_SINT:
case PIPE_FORMAT_R32G32B32A32_UINT:
- fmt.unk1 = 0x90000000;
- fmt.unk3 = 0x1;
- fmt.nr_channels = MALI_POSITIVE(4);
+ rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW128;
+ rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW128;
break;
default:
unreachable("Invalid format rendering");
}
-
- return fmt;
-}
-
-
-static void
-panfrost_mfbd_clear(
- struct panfrost_batch *batch,
- struct mali_framebuffer *fb,
- struct mali_framebuffer_extra *fbx,
- struct mali_render_target *rts,
- unsigned rt_count)
-{
- struct panfrost_context *ctx = batch->ctx;
- struct pipe_context *gallium = (struct pipe_context *) ctx;
- struct panfrost_device *dev = pan_device(gallium->screen);
-
- for (unsigned i = 0; i < rt_count; ++i) {
- if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
- continue;
-
- rts[i].clear_color_1 = batch->clear_color[i][0];
- rts[i].clear_color_2 = batch->clear_color[i][1];
- rts[i].clear_color_3 = batch->clear_color[i][2];
- rts[i].clear_color_4 = batch->clear_color[i][3];
- }
-
- if (batch->clear & PIPE_CLEAR_DEPTH) {
- fb->clear_depth = batch->clear_depth;
- }
-
- if (batch->clear & PIPE_CLEAR_STENCIL) {
- fb->clear_stencil = batch->clear_stencil;
- }
-
- if (dev->quirks & IS_BIFROST) {
- fbx->clear_color_1 = batch->clear_color[0][0];
- fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
- }
}
static void
-panfrost_mfbd_set_cbuf(
- struct mali_render_target *rt,
- struct pipe_surface *surf)
+panfrost_mfbd_rt_set_buf(struct pipe_surface *surf,
+ struct MALI_RENDER_TARGET *rt)
{
- struct panfrost_resource *rsrc = pan_resource(surf->texture);
struct panfrost_device *dev = pan_device(surf->context->screen);
- bool is_bifrost = dev->quirks & IS_BIFROST;
-
+ unsigned version = dev->gpu_id >> 12;
+ struct panfrost_resource *rsrc = pan_resource(surf->texture);
unsigned level = surf->u.tex.level;
unsigned first_layer = surf->u.tex.first_layer;
assert(surf->u.tex.last_layer == first_layer);
unsigned nr_samples = surf->texture->nr_samples;
unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
-
mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
- rt->format = panfrost_mfbd_format(surf);
-
if (layer_stride)
- rt->format.msaa = MALI_MSAA_LAYERED;
+ rt->writeback_msaa = MALI_MSAA_LAYERED;
else if (surf->nr_samples)
- rt->format.msaa = MALI_MSAA_AVERAGE;
+ rt->writeback_msaa = MALI_MSAA_AVERAGE;
else
- rt->format.msaa = MALI_MSAA_SINGLE;
+ rt->writeback_msaa = MALI_MSAA_SINGLE;
- /* Now, we set the modifier specific pieces */
+ panfrost_mfbd_rt_init_format(surf, rt);
if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
- if (is_bifrost) {
- rt->format.unk4 = 0x1;
- } else {
- rt->format.block = MALI_BLOCK_FORMAT_LINEAR;
- }
-
- rt->framebuffer = base;
- rt->framebuffer_stride = stride / 16;
- rt->layer_stride = layer_stride;
+ if (version >= 7)
+ rt->writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_LINEAR;
+ else
+ rt->writeback_block_format = MALI_BLOCK_FORMAT_LINEAR;
+
+ rt->writeback_base = base;
+ rt->writeback_row_stride = stride;
+ rt->writeback_surface_stride = layer_stride;
} else if (rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
- if (is_bifrost) {
- rt->format.unk3 |= 0x8;
- } else {
- rt->format.block = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
- }
-
- rt->framebuffer = base;
- rt->framebuffer_stride = stride;
- rt->layer_stride = layer_stride;
+ if (version >= 7)
+ rt->writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
+ else
+ rt->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
+
+ rt->writeback_base = base;
+ rt->writeback_row_stride = stride * 16;
+ rt->writeback_surface_stride = layer_stride;
} else if (drm_is_afbc(rsrc->modifier)) {
- rt->format.block = MALI_BLOCK_FORMAT_AFBC;
+ if (version >= 7)
+ rt->writeback_block_format = MALI_BLOCK_FORMAT_V7_AFBC;
+ else
+ rt->writeback_block_format = MALI_BLOCK_FORMAT_AFBC;
unsigned header_size = rsrc->slices[level].header_size;
- rt->framebuffer = base + header_size;
- rt->layer_stride = layer_stride;
- rt->afbc.metadata = base;
- rt->afbc.stride = 0;
- rt->afbc.flags = MALI_AFBC_FLAGS;
+ rt->afbc_header = base;
+ rt->afbc_chunk_size = 9;
+ rt->afbc_sparse = true;
+ rt->afbc_body = base + header_size;
+ rt->writeback_surface_stride = layer_stride;
if (rsrc->modifier & AFBC_FORMAT_MOD_YTR)
- rt->afbc.flags |= MALI_AFBC_YTR;
+ rt->afbc_yuv_transform_enable = true;
/* TODO: The blob sets this to something nonzero, but it's not
* clear what/how to calculate/if it matters */
- rt->framebuffer_stride = 0;
+ rt->afbc_body_size = 0;
} else {
unreachable("Invalid mod");
}
}
static void
-panfrost_mfbd_set_zsbuf(
- struct mali_framebuffer *fb,
- struct mali_framebuffer_extra *fbx,
- struct pipe_surface *surf)
+panfrost_mfbd_emit_rt(struct panfrost_batch *batch,
+ void *rtp, struct pipe_surface *surf,
+ unsigned rt_offset, unsigned rt_idx)
{
- struct panfrost_device *dev = pan_device(surf->context->screen);
- bool is_bifrost = dev->quirks & IS_BIFROST;
- struct panfrost_resource *rsrc = pan_resource(surf->texture);
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+ unsigned version = dev->gpu_id >> 12;
+
+ pan_pack(rtp, RENDER_TARGET, rt) {
+ rt.clean_pixel_write_enable = true;
+ if (surf) {
+ rt.write_enable = true;
+ rt.dithering_enable = true;
+ rt.internal_buffer_offset = rt_offset;
+ panfrost_mfbd_rt_set_buf(surf, &rt);
+ } else {
+ rt.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
+ rt.internal_buffer_offset = rt_offset;
+ if (version >= 7) {
+ rt.writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
+ rt.dithering_enable = true;
+ }
+ }
- unsigned nr_samples = surf->texture->nr_samples;
- nr_samples = MAX2(nr_samples, 1);
+ if (batch->clear & (PIPE_CLEAR_COLOR0 << rt_idx)) {
+ rt.clear_color_0 = batch->clear_color[rt_idx][0];
+ rt.clear_color_1 = batch->clear_color[rt_idx][1];
+ rt.clear_color_2 = batch->clear_color[rt_idx][2];
+ rt.clear_color_3 = batch->clear_color[rt_idx][3];
+ }
+ }
+}
- fbx->zs_samples = MALI_POSITIVE(nr_samples);
+static enum mali_z_internal_format
+get_z_internal_format(struct panfrost_batch *batch)
+{
+ struct pipe_surface *zs_surf = batch->key.zsbuf;
- unsigned level = surf->u.tex.level;
- unsigned first_layer = surf->u.tex.first_layer;
- assert(surf->u.tex.last_layer == first_layer);
+ /* Default to 24 bit depth if there's no surface. */
+ if (!zs_surf || !((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
+ return MALI_Z_INTERNAL_FORMAT_D24;
+
+ return panfrost_get_z_internal_format(zs_surf->format);
+}
+
+static void
+panfrost_mfbd_zs_crc_ext_set_bufs(struct panfrost_batch *batch,
+ struct MALI_ZS_CRC_EXTENSION *ext)
+{
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+ unsigned version = dev->gpu_id >> 12;
+
+ /* Checksumming only works with a single render target */
+ if (batch->key.nr_cbufs == 1) {
+ struct pipe_surface *c_surf = batch->key.cbufs[0];
+ struct panfrost_resource *rsrc = pan_resource(c_surf->texture);
+
+ if (rsrc->checksummed) {
+ unsigned level = c_surf->u.tex.level;
+ struct panfrost_slice *slice = &rsrc->slices[level];
+
+ ext->crc_row_stride = slice->checksum_stride;
+ if (slice->checksum_bo)
+ ext->crc_base = slice->checksum_bo->gpu;
+ else
+ ext->crc_base = rsrc->bo->gpu + slice->checksum_offset;
+
+ if ((batch->clear & PIPE_CLEAR_COLOR0) && version >= 7) {
+ ext->crc_clear_color = batch->clear_color[0][0] |
+ 0xc000000000000000 |
+ ((uint64_t)batch->clear_color[0][0] & 0xffff) << 32;
+ }
+ }
+ }
+
+ struct pipe_surface *zs_surf = batch->key.zsbuf;
+
+ if (!((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
+ zs_surf = NULL;
+
+ if (!zs_surf)
+ return;
+
+ struct panfrost_resource *rsrc = pan_resource(zs_surf->texture);
+ unsigned nr_samples = MAX2(zs_surf->texture->nr_samples, 1);
+ unsigned level = zs_surf->u.tex.level;
+ unsigned first_layer = zs_surf->u.tex.first_layer;
+ assert(zs_surf->u.tex.last_layer == first_layer);
mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
+ ext->zs_msaa = nr_samples > 1 ? MALI_MSAA_LAYERED : MALI_MSAA_SINGLE;
+
if (drm_is_afbc(rsrc->modifier)) {
/* The only Z/S format we can compress is Z24S8 or variants
* thereof (handled by the gallium frontend) */
- assert(panfrost_is_z24s8_variant(surf->format));
+ assert(panfrost_is_z24s8_variant(zs_surf->format));
unsigned header_size = rsrc->slices[level].header_size;
- fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE;
-
- fbx->flags_hi |= MALI_EXTRA_PRESENT;
- fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
- fbx->zs_block = MALI_BLOCK_FORMAT_AFBC;
-
- fbx->ds_afbc.depth_stencil = base + header_size;
- fbx->ds_afbc.depth_stencil_afbc_metadata = base;
- fbx->ds_afbc.depth_stencil_afbc_stride = 0;
-
- fbx->ds_afbc.flags = MALI_AFBC_FLAGS;
- fbx->ds_afbc.padding = 0x1000;
+ ext->zs_write_format = MALI_ZS_FORMAT_D24S8;
+ if (version >= 7)
+ ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_AFBC;
+ else
+ ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
+
+ ext->zs_afbc_header = base;
+ ext->zs_afbc_body = base + header_size;
+ ext->zs_afbc_body_size = 0x1000;
+ ext->zs_afbc_chunk_size = 9;
+ ext->zs_afbc_sparse = true;
} else {
- assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
+ assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
+ rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
/* TODO: Z32F(S8) support, which is always linear */
int stride = rsrc->slices[level].stride;
unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
- fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE;
- fbx->flags_hi |= MALI_EXTRA_PRESENT;
- fbx->flags_lo |= MALI_EXTRA_ZS;
-
- fbx->ds_linear.depth = base;
+ ext->zs_writeback_base = base;
+ ext->zs_writeback_row_stride = stride;
+ ext->zs_writeback_surface_stride = layer_stride;
if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
- fbx->zs_block = MALI_BLOCK_FORMAT_LINEAR;
- fbx->ds_linear.depth_stride = stride / 16;
- fbx->ds_linear.depth_layer_stride = layer_stride;
+ if (version >= 7)
+ ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_LINEAR;
+ else
+ ext->zs_block_format = MALI_BLOCK_FORMAT_LINEAR;
} else {
- if (is_bifrost) {
- /* XXX: Bifrost fields are different here */
- fbx->zs_block = 1;
- fbx->flags_hi |= 0x440;
- fbx->flags_lo |= 0x1;
- } else {
- fbx->zs_block = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
- }
-
- fbx->ds_linear.depth_stride = stride;
- fbx->ds_linear.depth_layer_stride = layer_stride;
+ ext->zs_writeback_row_stride *= 16;
+ if (version >= 7)
+ ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
+ else
+ ext->zs_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
}
- if (panfrost_is_z24s8_variant(surf->format)) {
- fbx->flags_lo |= 0x1;
- } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
- fbx->flags_lo |= 0xA;
- fb->mfbd_flags ^= 0x100;
- fb->mfbd_flags |= 0x200;
- } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- fbx->flags_hi |= 0x40;
- fbx->flags_lo |= 0xA;
- fb->mfbd_flags ^= 0x100;
- fb->mfbd_flags |= 0x201;
+ switch (zs_surf->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ ext->zs_write_format = MALI_ZS_FORMAT_D24S8;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ ext->zs_write_format = MALI_ZS_FORMAT_D24X8;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ ext->zs_write_format = MALI_ZS_FORMAT_D32;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Midgard/Bifrost support interleaved depth/stencil
+ * buffers, but we always treat them as multu-planar.
+ */
+ ext->zs_write_format = MALI_ZS_FORMAT_D32;
+ ext->s_write_format = MALI_S_FORMAT_S8;
struct panfrost_resource *stencil = rsrc->separate_stencil;
struct panfrost_slice stencil_slice = stencil->slices[level];
unsigned stencil_layer_stride = (nr_samples > 1) ? stencil_slice.size0 : 0;
- fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer, 0);
- fbx->ds_linear.stencil_stride = stencil_slice.stride;
- fbx->ds_linear.stencil_layer_stride = stencil_layer_stride;
+ ext->s_writeback_base = panfrost_get_texture_address(stencil, level, first_layer, 0);
+ ext->s_writeback_row_stride = stencil_slice.stride;
+ if (rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
+ ext->s_writeback_row_stride *= 16;
+ ext->s_writeback_surface_stride = stencil_layer_stride;
+ break;
+ default:
+ unreachable("Unsupported depth/stencil format.");
}
}
}
-/* Helper for sequential uploads used for MFBD */
-
-#define UPLOAD(dest, offset, src, max) { \
- size_t sz = sizeof(*src); \
- memcpy(dest.cpu + offset, src, sz); \
- assert((offset + sz) <= max); \
- offset += sz; \
-}
-
-static mali_ptr
-panfrost_mfbd_upload(struct panfrost_batch *batch,
- struct mali_framebuffer *fb,
- struct mali_framebuffer_extra *fbx,
- struct mali_render_target *rts,
- unsigned rt_count)
+static void
+panfrost_mfbd_emit_zs_crc_ext(struct panfrost_batch *batch, void *extp)
{
- off_t offset = 0;
-
- /* There may be extra data stuck in the middle */
- bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
-
- /* Compute total size for transfer */
-
- size_t total_sz =
- sizeof(struct mali_framebuffer) +
- (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
- sizeof(struct mali_render_target) * 8;
-
- struct panfrost_transfer m_f_trans =
- panfrost_pool_alloc_aligned(&batch->pool, total_sz, 64);
-
- /* Do the transfer */
-
- UPLOAD(m_f_trans, offset, fb, total_sz);
-
- if (has_extra)
- UPLOAD(m_f_trans, offset, fbx, total_sz);
-
- for (unsigned c = 0; c < 8; ++c) {
- UPLOAD(m_f_trans, offset, &rts[c], total_sz);
+ pan_pack(extp, ZS_CRC_EXTENSION, ext) {
+ ext.zs_clean_pixel_write_enable = true;
+ panfrost_mfbd_zs_crc_ext_set_bufs(batch, &ext);
}
-
- /* Return pointer suitable for the fragment section */
- unsigned tag =
- MALI_MFBD |
- (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
- (MALI_POSITIVE(rt_count) << 2);
-
- return m_f_trans.gpu | tag;
}
-#undef UPLOAD
-
/* Determines the # of bytes per pixel we need to reserve for a given format in
* the tilebuffer (compared to 128-bit budget, etc). Usually the same as the
* bytes per pixel of the format itself, but there are some special cases I
return desc->block.bits / 8;
}
-/* Determines whether a framebuffer uses too much tilebuffer space (requiring
- * us to scale up the tile at a performance penalty). This is conservative but
- * afaict you get 128-bits per pixel normally */
+/* Calculates the internal color buffer size and tile size based on the number
+ * of RT, the format and the number of pixels. If things do not fit in 4KB, we
+ * shrink the tile size to make it fit.
+ */
static unsigned
-pan_tib_size(struct panfrost_batch *batch)
+pan_internal_cbuf_size(struct panfrost_batch *batch, unsigned *tile_size)
{
- unsigned size = 0;
+ unsigned total_size = 0;
+ *tile_size = 16 * 16;
for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb];
assert(surf);
- size += pan_bytes_per_pixel_tib(surf->format);
+
+ unsigned nr_samples = MAX3(surf->nr_samples, surf->texture->nr_samples, 1);
+ total_size += pan_bytes_per_pixel_tib(surf->format) *
+ nr_samples * (*tile_size);
}
- return size;
-}
+ /* We have a 4KB budget, let's reduce the tile size until it fits. */
+ while (total_size > 4096) {
+ total_size >>= 1;
+ *tile_size >>= 1;
+ }
-static unsigned
-pan_tib_shift(struct panfrost_batch *batch)
-{
- unsigned size = pan_tib_size(batch);
+ /* Align on 1k. */
+ total_size = ALIGN_POT(total_size, 1024);
- if (size > 128)
- return 4;
- else if (size > 64)
- return 5;
- else if (size > 32)
- return 6;
- else if (size > 16)
- return 7;
- else
- return 8;
+ /* Minimum tile size is 4x4. */
+ assert(*tile_size > 4 * 4);
+ return total_size;
}
-static struct mali_framebuffer
-panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
+static void
+panfrost_mfbd_emit_local_storage(struct panfrost_batch *batch, void *fb)
{
- struct panfrost_context *ctx = batch->ctx;
- struct pipe_context *gallium = (struct pipe_context *) ctx;
- struct panfrost_device *dev = pan_device(gallium->screen);
-
- unsigned width = batch->key.width;
- unsigned height = batch->key.height;
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
- struct mali_framebuffer mfbd = {
- .width1 = MALI_POSITIVE(width),
- .height1 = MALI_POSITIVE(height),
- .width2 = MALI_POSITIVE(width),
- .height2 = MALI_POSITIVE(height),
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, LOCAL_STORAGE, ls) {
+ if (batch->stack_size) {
+ unsigned shift =
+ panfrost_get_stack_shift(batch->stack_size);
+ struct panfrost_bo *bo =
+ panfrost_batch_get_scratchpad(batch,
+ batch->stack_size,
+ dev->thread_tls_alloc,
+ dev->core_count);
+ ls.tls_size = shift;
+ ls.tls_base_pointer = bo->gpu;
+ }
- /* Configures tib size */
- .unk1 = (pan_tib_shift(batch) << 9) | 0x80,
+ ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+ }
+}
- .rt_count_1 = MALI_POSITIVE(MAX2(batch->key.nr_cbufs, 1)),
- .rt_count_2 = 4,
- };
+static void
+panfrost_mfbd_emit_midgard_tiler(struct panfrost_batch *batch, void *fb,
+ unsigned vertex_count)
+{
+ void *t = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER);
- if (dev->quirks & IS_BIFROST) {
- mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
- mfbd.tiler_meta = panfrost_batch_get_bifrost_tiler(batch, vertex_count);
- } else {
- struct mali_local_storage_packed lsp;
-
- pan_pack(&lsp, LOCAL_STORAGE, ls) {
- if (batch->stack_size) {
- unsigned shift =
- panfrost_get_stack_shift(batch->stack_size);
- struct panfrost_bo *bo =
- panfrost_batch_get_scratchpad(batch,
- batch->stack_size,
- dev->thread_tls_alloc,
- dev->core_count);
- ls.tls_size = shift;
- ls.tls_base_pointer = bo->gpu;
- }
+ panfrost_emit_midg_tiler(batch, t, vertex_count);
- ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
- }
- mfbd.shared_memory = lsp;
+ /* All weights set to 0, nothing to do here */
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w);
+}
- struct mali_midgard_tiler_packed t;
- panfrost_emit_midg_tiler(batch, &t, vertex_count);
- mfbd.tiler = t;
+static void
+panfrost_mfbd_emit_bifrost_parameters(struct panfrost_batch *batch, void *fb)
+{
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, params) {
+ params.sample_locations = panfrost_emit_sample_locations(batch);
}
+}
- return mfbd;
+static void
+panfrost_mfbd_emit_bifrost_tiler(struct panfrost_batch *batch, void *fb,
+ unsigned vertex_count)
+{
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_TILER_POINTER, tiler) {
+ tiler.address = panfrost_batch_get_bifrost_tiler(batch, vertex_count);
+ }
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PADDING, padding);
}
void
panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
{
- struct mali_framebuffer mfbd =
- panfrost_emit_mfbd(batch, vertex_count);
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+ void *fb = batch->framebuffer.cpu;
- memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
+ if (dev->quirks & IS_BIFROST)
+ panfrost_mfbd_emit_bifrost_parameters(batch, fb);
+ else
+ panfrost_mfbd_emit_local_storage(batch, fb);
+
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params) {
+ params.width = batch->key.width;
+ params.height = batch->key.height;
+ params.bound_max_x = batch->key.width - 1;
+ params.bound_max_y = batch->key.height - 1;
+ params.color_buffer_allocation =
+ pan_internal_cbuf_size(batch, ¶ms.effective_tile_size);
+ params.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
+ params.render_target_count = MAX2(batch->key.nr_cbufs, 1);
+ }
+
+ if (dev->quirks & IS_BIFROST)
+ panfrost_mfbd_emit_bifrost_tiler(batch, fb, vertex_count);
+ else
+ panfrost_mfbd_emit_midgard_tiler(batch, fb, vertex_count);
}
/* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
{
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
- bool is_bifrost = dev->quirks & IS_BIFROST;
+ unsigned vertex_count = has_draws;
+ struct panfrost_transfer t =
+ panfrost_pool_alloc_aligned(&batch->pool,
+ panfrost_mfbd_size(batch), 64);
+ void *fb = t.cpu, *zs_crc_ext, *rts;
+
+ if (panfrost_mfbd_has_zs_crc_ext(batch)) {
+ zs_crc_ext = fb + MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
+ rts = zs_crc_ext + MALI_ZS_CRC_EXTENSION_LENGTH;
+ } else {
+ zs_crc_ext = NULL;
+ rts = fb + MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
+ }
- struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
- struct mali_framebuffer_extra fbx = {0};
- struct mali_render_target rts[8] = {0};
+ /* When scanning out, the depth buffer is immediately invalidated, so
+ * we don't need to waste bandwidth writing it out. This can improve
+ * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
+ * memory bandwidth!).
+ *
+ * The exception is ReadPixels, but this is not supported on GLES so we
+ * can safely ignore it. */
- /* We always upload at least one dummy GL_NONE render target */
+ if (panfrost_batch_is_scanout(batch))
+ batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
- unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
+ if (zs_crc_ext) {
+ if (batch->key.zsbuf &&
+ MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1)
+ batch->requirements |= PAN_REQ_MSAA;
- fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
- fb.mfbd_flags = 0x100;
+ panfrost_mfbd_emit_zs_crc_ext(batch, zs_crc_ext);
+ }
- panfrost_mfbd_clear(batch, &fb, &fbx, rts, rt_descriptors);
+ /* We always upload at least one dummy GL_NONE render target */
+
+ unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
/* Upload either the render target or a dummy GL_NONE target */
- unsigned offset = 0;
- unsigned tib_shift = pan_tib_shift(batch);
+ unsigned rt_offset = 0, tib_size;
+ unsigned internal_cbuf_size = pan_internal_cbuf_size(batch, &tib_size);
for (int cb = 0; cb < rt_descriptors; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb];
- unsigned rt_offset = offset << tib_shift;
+ void *rt = rts + (cb * MALI_RENDER_TARGET_LENGTH);
- if (surf && ((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb))) {
- if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1)
- batch->requirements |= PAN_REQ_MSAA;
+ if (!((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb)))
+ surf = NULL;
- panfrost_mfbd_set_cbuf(&rts[cb], surf);
+ panfrost_mfbd_emit_rt(batch, rt, surf, rt_offset, cb);
- offset += pan_bytes_per_pixel_tib(surf->format);
- } else {
- struct mali_rt_format null_rt = {
- .unk1 = 0x4000000,
- .no_preload = true
- };
-
- if (is_bifrost) {
- null_rt.flags = 0x2;
- null_rt.unk3 = 0x8;
- }
+ if (surf) {
+ if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1)
+ batch->requirements |= PAN_REQ_MSAA;
- rts[cb].format = null_rt;
- rts[cb].framebuffer = 0;
- rts[cb].framebuffer_stride = 0;
+ rt_offset += pan_bytes_per_pixel_tib(surf->format) * tib_size;
}
-
- /* TODO: Break out the field */
- rts[cb].format.unk1 |= rt_offset;
- }
-
- fb.rt_count_2 = MAX2(DIV_ROUND_UP(offset, 1 << (10 - tib_shift)), 1);
-
- if (batch->key.zsbuf && ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
- if (MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1)
- batch->requirements |= PAN_REQ_MSAA;
-
- panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
}
- /* When scanning out, the depth buffer is immediately invalidated, so
- * we don't need to waste bandwidth writing it out. This can improve
- * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
- * memory bandwidth!).
- *
- * The exception is ReadPixels, but this is not supported on GLES so we
- * can safely ignore it. */
-
- if (panfrost_batch_is_scanout(batch))
- batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
+ if (dev->quirks & IS_BIFROST)
+ panfrost_mfbd_emit_bifrost_parameters(batch, fb);
+ else
+ panfrost_mfbd_emit_local_storage(batch, fb);
+
+ pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params) {
+ params.width = batch->key.width;
+ params.height = batch->key.height;
+ params.bound_max_x = batch->key.width - 1;
+ params.bound_max_y = batch->key.height - 1;
+ params.effective_tile_size = tib_size;
+ params.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
+ params.render_target_count = rt_descriptors;
+ params.z_internal_format = get_z_internal_format(batch);
+
+ if (batch->clear & PIPE_CLEAR_DEPTH)
+ params.z_clear = batch->clear_depth;
+ if (batch->clear & PIPE_CLEAR_STENCIL)
+ params.s_clear = batch->clear_stencil & 0xff;
+
+ params.color_buffer_allocation = internal_cbuf_size;
+
+ if (batch->requirements & PAN_REQ_MSAA) {
+ /* MSAA 4x */
+ params.sample_count = 4;
+ params.sample_pattern = MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
+ }
- /* Actualize the requirements */
+ if (batch->key.zsbuf &&
+ ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
+ params.z_write_enable = true;
+ if (batch->key.zsbuf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ params.s_write_enable = true;
+ }
- if (batch->requirements & PAN_REQ_MSAA) {
- /* XXX */
- fb.unk1 |= (1 << 4) | (1 << 1);
- fb.rt_count_2 = 4;
+ params.has_zs_crc_extension = !!zs_crc_ext;
}
- if (batch->requirements & PAN_REQ_DEPTH_WRITE)
- fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
-
- /* Checksumming only works with a single render target */
-
- if (batch->key.nr_cbufs == 1) {
- struct pipe_surface *surf = batch->key.cbufs[0];
- struct panfrost_resource *rsrc = pan_resource(surf->texture);
-
- if (rsrc->checksummed) {
- unsigned level = surf->u.tex.level;
- struct panfrost_slice *slice = &rsrc->slices[level];
+ if (dev->quirks & IS_BIFROST)
+ panfrost_mfbd_emit_bifrost_tiler(batch, fb, vertex_count);
+ else
+ panfrost_mfbd_emit_midgard_tiler(batch, fb, vertex_count);
- fb.mfbd_flags |= MALI_MFBD_EXTRA;
- fbx.flags_hi |= MALI_EXTRA_PRESENT;
- fbx.checksum_stride = slice->checksum_stride;
- if (slice->checksum_bo)
- fbx.checksum = slice->checksum_bo->gpu;
- else
- fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
- }
- }
+ /* Return pointer suitable for the fragment section */
+ unsigned tag =
+ MALI_FBD_TAG_IS_MFBD |
+ (zs_crc_ext ? MALI_FBD_TAG_HAS_ZS_RT : 0) |
+ (MALI_POSITIVE(rt_descriptors) << 2);
- return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
+ return t.gpu | tag;
}
* 4. Otherwise, set magic_divisor = m and extra_flags = 0.
*/
-#define FBD_MASK (~0x3f)
-
-/* MFBD, rather than SFBD */
-#define MALI_MFBD (0x1)
-
-/* ORed into an MFBD address to specify the fbx section is included */
-#define MALI_MFBD_TAG_EXTRA (0x2)
-
/* On Bifrost, these fields are the same between the vertex and tiler payloads.
* They also seem to be the same between Bifrost and Midgard. They're shared in
* fused payloads.
mali_ptr framebuffer;
} __attribute__((packed));
-/* Configures multisampling on Bifrost fragment jobs */
-
-struct bifrost_multisampling {
- u64 zero1;
- u64 zero2;
- mali_ptr sample_locations;
- u64 zero4;
-} __attribute__((packed));
-
-#define MALI_MFBD_FORMAT_SRGB (1 << 0)
-
-struct mali_rt_format {
- unsigned unk1 : 32;
- unsigned unk2 : 3;
-
- unsigned nr_channels : 2; /* MALI_POSITIVE */
-
- unsigned unk3 : 4;
- unsigned unk4 : 1;
- enum mali_block_format block : 2;
- enum mali_msaa msaa : 2;
- unsigned flags : 2;
-
- unsigned swizzle : 12;
-
- unsigned zero : 3;
-
- /* Disables MFBD preload. When this bit is set, the render target will
- * be cleared every frame. When this bit is clear, the hardware will
- * automatically wallpaper the render target back from main memory.
- * Unfortunately, MFBD preload is very broken on Midgard, so in
- * practice, this is a chicken bit that should always be set.
- * Discovered by accident, as all good chicken bits are. */
-
- unsigned no_preload : 1;
-} __attribute__((packed));
-
-/* Flags for afbc.flags and ds_afbc.flags */
-
-#define MALI_AFBC_FLAGS 0x10009
-
-/* Lossless RGB and RGBA colorspace transform */
-#define MALI_AFBC_YTR (1 << 17)
-
-struct mali_render_target {
- struct mali_rt_format format;
-
- u64 zero1;
-
- struct {
- /* Stuff related to ARM Framebuffer Compression. When AFBC is enabled,
- * there is an extra metadata buffer that contains 16 bytes per tile.
- * The framebuffer needs to be the same size as before, since we don't
- * know ahead of time how much space it will take up. The
- * framebuffer_stride is set to 0, since the data isn't stored linearly
- * anymore.
- *
- * When AFBC is disabled, these fields are zero.
- */
-
- mali_ptr metadata;
- u32 stride; // stride in units of tiles
- u32 flags; // = 0x20000
- } afbc;
-
- mali_ptr framebuffer;
-
- u32 zero2 : 4;
- u32 framebuffer_stride : 28; // in units of bytes, row to next
- u32 layer_stride; /* For multisample rendering */
-
- u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
- u32 clear_color_2; // always equal, but unclear function?
- u32 clear_color_3; // always equal, but unclear function?
- u32 clear_color_4; // always equal, but unclear function?
-} __attribute__((packed));
-
-/* An optional part of mali_framebuffer. It comes between the main structure
- * and the array of render targets. It must be included if any of these are
- * enabled:
- *
- * - Transaction Elimination
- * - Depth/stencil
- * - TODO: Anything else?
- */
-
-/* flags_hi */
-#define MALI_EXTRA_PRESENT (0x1)
-
-/* flags_lo */
-#define MALI_EXTRA_ZS (0x4)
-
-struct mali_framebuffer_extra {
- mali_ptr checksum;
- /* Each tile has an 8 byte checksum, so the stride is "width in tiles * 8" */
- u32 checksum_stride;
-
- unsigned flags_lo : 4;
- enum mali_block_format zs_block : 2;
-
- /* Number of samples in Z/S attachment, MALI_POSITIVE. So zero for
- * 1-sample (non-MSAA), 0x3 for MSAA 4x, etc */
- unsigned zs_samples : 4;
- unsigned flags_hi : 22;
-
- union {
- /* Note: AFBC is only allowed for 24/8 combined depth/stencil. */
- struct {
- mali_ptr depth_stencil_afbc_metadata;
- u32 depth_stencil_afbc_stride; // in units of tiles
- u32 flags;
-
- mali_ptr depth_stencil;
-
- u64 padding;
- } ds_afbc;
-
- struct {
- /* Depth becomes depth/stencil in case of combined D/S */
- mali_ptr depth;
- u32 depth_stride_zero : 4;
- u32 depth_stride : 28;
- u32 depth_layer_stride;
-
- mali_ptr stencil;
- u32 stencil_stride_zero : 4;
- u32 stencil_stride : 28;
- u32 stencil_layer_stride;
- } ds_linear;
- };
-
-
- u32 clear_color_1;
- u32 clear_color_2;
- u64 zero3;
-} __attribute__((packed));
-
-/* Flags for mfbd_flags */
-
-/* Enables writing depth results back to main memory (rather than keeping them
- * on-chip in the tile buffer and then discarding) */
-
-#define MALI_MFBD_DEPTH_WRITE (1 << 10)
-
-/* The MFBD contains the extra mali_framebuffer_extra section */
-
-#define MALI_MFBD_EXTRA (1 << 13)
-
-struct mali_framebuffer {
- union {
- struct mali_local_storage_packed shared_memory;
- struct bifrost_multisampling msaa;
- };
-
- /* 0x20 */
- u16 width1, height1;
- u32 zero3;
- u16 width2, height2;
- u32 unk1 : 19; // = 0x01000
- u32 rt_count_1 : 3; // off-by-one (use MALI_POSITIVE)
- u32 unk2 : 2; // = 0
- u32 rt_count_2 : 3; // no off-by-one
- u32 zero4 : 5;
- /* 0x30 */
- u32 clear_stencil : 8;
- u32 mfbd_flags : 24; // = 0x100
- float clear_depth;
-
- union {
- struct {
- struct mali_midgard_tiler_packed tiler;
- struct mali_midgard_tiler_weights_packed tiler_weights;
- };
- struct {
- mali_ptr tiler_meta;
- u32 zeros[16];
- };
- };
-
- /* optional: struct mali_framebuffer_extra extra */
- /* struct mali_render_target rts[] */
-} __attribute__((packed));
-
#endif /* __PANFROST_JOB_H__ */
}
}
-struct pandecode_flag_info {
- u64 flag;
- const char *name;
-};
-
-static void
-pandecode_log_decoded_flags(const struct pandecode_flag_info *flag_info,
- u64 flags)
-{
- bool decodable_flags_found = false;
-
- for (int i = 0; flag_info[i].name; i++) {
- if ((flags & flag_info[i].flag) != flag_info[i].flag)
- continue;
-
- if (!decodable_flags_found) {
- decodable_flags_found = true;
- } else {
- pandecode_log_cont(" | ");
- }
-
- pandecode_log_cont("%s", flag_info[i].name);
-
- flags &= ~flag_info[i].flag;
- }
-
- if (decodable_flags_found) {
- if (flags)
- pandecode_log_cont(" | 0x%" PRIx64, flags);
- } else {
- pandecode_log_cont("0x%" PRIx64, flags);
- }
-}
-
-#define FLAG_INFO(flag) { MALI_MFBD_FORMAT_##flag, "MALI_MFBD_FORMAT_" #flag }
-static const struct pandecode_flag_info mfbd_fmt_flag_info[] = {
- FLAG_INFO(SRGB),
- {}
-};
-#undef FLAG_INFO
-
-#define FLAG_INFO(flag) { MALI_AFBC_##flag, "MALI_AFBC_" #flag }
-static const struct pandecode_flag_info afbc_fmt_flag_info[] = {
- FLAG_INFO(YTR),
- {}
-};
-#undef FLAG_INFO
-
-#define FLAG_INFO(flag) { MALI_EXTRA_##flag, "MALI_EXTRA_" #flag }
-static const struct pandecode_flag_info mfbd_extra_flag_hi_info[] = {
- FLAG_INFO(PRESENT),
- {}
-};
-#undef FLAG_INFO
-
-#define FLAG_INFO(flag) { MALI_EXTRA_##flag, "MALI_EXTRA_" #flag }
-static const struct pandecode_flag_info mfbd_extra_flag_lo_info[] = {
- FLAG_INFO(ZS),
- {}
-};
-#undef FLAG_INFO
-
-#define FLAG_INFO(flag) { MALI_MFBD_##flag, "MALI_MFBD_" #flag }
-static const struct pandecode_flag_info mfbd_flag_info [] = {
- FLAG_INFO(DEPTH_WRITE),
- FLAG_INFO(EXTRA),
- {}
-};
-#undef FLAG_INFO
-
/* Midgard's tiler descriptor is embedded within the
* larger FBD */
DUMP_UNPACKED(MIDGARD_TILER_WEIGHTS, w, "Tiler Weights:\n");
}
-/* TODO: The Bifrost tiler is not understood at all yet */
-
-static void
-pandecode_bifrost_tiler_descriptor(const struct mali_framebuffer *fb)
-{
- pandecode_log(".tiler = {\n");
- pandecode_indent++;
-
- MEMORY_PROP(fb, tiler_meta);
-
- for (int i = 0; i < 16; i++) {
- if (fb->zeros[i] != 0) {
- pandecode_msg("XXX: tiler descriptor zero %d tripped, value %x\n",
- i, fb->zeros[i]);
- }
- }
-
- pandecode_log("},\n");
-
- pandecode_indent--;
- pandecode_log("}\n");
-
-}
-
/* Information about the framebuffer passed back for
* additional analysis */
}
static void
-pandecode_rt_format(struct mali_rt_format format)
+pandecode_render_target(uint64_t gpu_va, unsigned job_no, bool is_bifrost, unsigned gpu_id,
+ const struct MALI_MULTI_TARGET_FRAMEBUFFER_PARAMETERS *fb)
{
- pandecode_log(".format = {\n");
+ pandecode_log("Color Render Targets:\n");
pandecode_indent++;
- pandecode_prop("unk1 = 0x%" PRIx32, format.unk1);
- pandecode_prop("unk2 = 0x%" PRIx32, format.unk2);
- pandecode_prop("unk3 = 0x%" PRIx32, format.unk3);
- pandecode_prop("unk4 = 0x%" PRIx32, format.unk4);
-
- pandecode_prop("block = %s", mali_block_format_as_str(format.block));
-
- /* TODO: Map formats so we can check swizzles and print nicely */
- pandecode_log("swizzle");
- pandecode_swizzle(format.swizzle, MALI_RGBA8_UNORM);
- pandecode_log_cont(",\n");
-
- pandecode_prop("nr_channels = MALI_POSITIVE(%d)",
- (format.nr_channels + 1));
-
- pandecode_log(".flags = ");
- pandecode_log_decoded_flags(mfbd_fmt_flag_info, format.flags);
- pandecode_log_cont(",\n");
-
- pandecode_prop("msaa = %s", mali_msaa_as_str(format.msaa));
-
- /* In theory, the no_preload bit can be cleared to enable MFBD preload,
- * which is a faster hardware-based alternative to the wallpaper method
- * to preserve framebuffer contents across frames. In practice, MFBD
- * preload is buggy on Midgard, and so this is a chicken bit. If this
- * bit isn't set, most likely something broke unrelated to preload */
-
- if (!format.no_preload) {
- pandecode_msg("XXX: buggy MFBD preload enabled - chicken bit should be clear\n");
- pandecode_prop("no_preload = 0x%" PRIx32, format.no_preload);
+ for (int i = 0; i < (fb->render_target_count); i++) {
+ mali_ptr rt_va = gpu_va + i * MALI_RENDER_TARGET_LENGTH;
+ struct pandecode_mapped_memory *mem =
+ pandecode_find_mapped_gpu_mem_containing(rt_va);
+ const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, mem, (mali_ptr) rt_va);
+ DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
}
- if (format.zero)
- pandecode_prop("zero = 0x%" PRIx32, format.zero);
-
pandecode_indent--;
- pandecode_log("},\n");
+ pandecode_log("\n");
}
static void
-pandecode_render_target(uint64_t gpu_va, unsigned job_no, const struct mali_framebuffer *fb)
+pandecode_mfbd_bifrost_deps(const void *fb, int job_no)
{
- pandecode_log("struct mali_render_target rts_list_%"PRIx64"_%d[] = {\n", gpu_va, job_no);
- pandecode_indent++;
-
- for (int i = 0; i < (fb->rt_count_1 + 1); i++) {
- mali_ptr rt_va = gpu_va + i * sizeof(struct mali_render_target);
- struct pandecode_mapped_memory *mem =
- pandecode_find_mapped_gpu_mem_containing(rt_va);
- const struct mali_render_target *PANDECODE_PTR_VAR(rt, mem, (mali_ptr) rt_va);
-
- pandecode_log("{\n");
- pandecode_indent++;
-
- pandecode_rt_format(rt->format);
-
- if (rt->format.block == MALI_BLOCK_FORMAT_AFBC) {
- pandecode_log(".afbc = {\n");
- pandecode_indent++;
-
- char *a = pointer_as_memory_reference(rt->afbc.metadata);
- pandecode_prop("metadata = %s", a);
- free(a);
-
- pandecode_prop("stride = %d", rt->afbc.stride);
-
- pandecode_log(".flags = ");
- pandecode_log_decoded_flags(afbc_fmt_flag_info, rt->afbc.flags);
- pandecode_log_cont(",\n");
-
- pandecode_indent--;
- pandecode_log("},\n");
- } else if (rt->afbc.metadata || rt->afbc.stride || rt->afbc.flags) {
- pandecode_msg("XXX: AFBC disabled but AFBC field set (0x%lX, 0x%x, 0x%x)\n",
- rt->afbc.metadata,
- rt->afbc.stride,
- rt->afbc.flags);
- }
-
- MEMORY_PROP(rt, framebuffer);
- pandecode_prop("framebuffer_stride = %d", rt->framebuffer_stride);
+ pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, params);
+
+ /* The blob stores all possible sample locations in a single buffer
+ * allocated on startup, and just switches the pointer when switching
+ * MSAA state. For now, we just put the data into the cmdstream, but we
+ * should do something like what the blob does with a real driver.
+ *
+ * There seem to be 32 slots for sample locations, followed by another
+ * 16. The second 16 is just the center location followed by 15 zeros
+ * in all the cases I've identified (maybe shader vs. depth/color
+ * samples?).
+ */
- if (rt->layer_stride)
- pandecode_prop("layer_stride = %d", rt->layer_stride);
+ struct pandecode_mapped_memory *smem =
+ pandecode_find_mapped_gpu_mem_containing(params.sample_locations);
- if (rt->clear_color_1 | rt->clear_color_2 | rt->clear_color_3 | rt->clear_color_4) {
- pandecode_prop("clear_color_1 = 0x%" PRIx32, rt->clear_color_1);
- pandecode_prop("clear_color_2 = 0x%" PRIx32, rt->clear_color_2);
- pandecode_prop("clear_color_3 = 0x%" PRIx32, rt->clear_color_3);
- pandecode_prop("clear_color_4 = 0x%" PRIx32, rt->clear_color_4);
- }
-
- if (rt->zero1 || rt->zero2) {
- pandecode_msg("XXX: render target zeros tripped\n");
- pandecode_prop("zero1 = 0x%" PRIx64, rt->zero1);
- pandecode_prop("zero2 = 0x%" PRIx32, rt->zero2);
- }
+ const u16 *PANDECODE_PTR_VAR(samples, smem, params.sample_locations);
- pandecode_indent--;
- pandecode_log("},\n");
+ pandecode_log("uint16_t sample_locations_%d[] = {\n", job_no);
+ pandecode_indent++;
+ for (int i = 0; i < 32 + 16; i++) {
+ pandecode_log("%d, %d,\n", samples[2 * i], samples[2 * i + 1]);
}
pandecode_indent--;
}
static struct pandecode_fbd
-pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_compute, bool is_bifrost)
+pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_compute, bool is_bifrost, unsigned gpu_id)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
- const struct mali_framebuffer *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
+ const void *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
+ pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params);
struct pandecode_fbd info;
- if (is_bifrost && fb->msaa.sample_locations) {
- /* The blob stores all possible sample locations in a single buffer
- * allocated on startup, and just switches the pointer when switching
- * MSAA state. For now, we just put the data into the cmdstream, but we
- * should do something like what the blob does with a real driver.
- *
- * There seem to be 32 slots for sample locations, followed by another
- * 16. The second 16 is just the center location followed by 15 zeros
- * in all the cases I've identified (maybe shader vs. depth/color
- * samples?).
- */
-
- struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(fb->msaa.sample_locations);
-
- const u16 *PANDECODE_PTR_VAR(samples, smem, fb->msaa.sample_locations);
-
- pandecode_log("uint16_t sample_locations_%d[] = {\n", job_no);
- pandecode_indent++;
-
- for (int i = 0; i < 32 + 16; i++) {
- pandecode_log("%d, %d,\n", samples[2 * i], samples[2 * i + 1]);
- }
-
- pandecode_indent--;
- pandecode_log("};\n");
- }
+ if (is_bifrost)
+ pandecode_mfbd_bifrost_deps(fb, job_no);
- pandecode_log("struct mali_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
+ pandecode_log("Multi-Target Framebuffer:\n");
pandecode_indent++;
if (is_bifrost) {
- pandecode_log(".msaa = {\n");
- pandecode_indent++;
-
- if (fb->msaa.sample_locations)
- pandecode_prop("sample_locations = sample_locations_%d", job_no);
- else
- pandecode_msg("XXX: sample_locations missing\n");
-
- if (fb->msaa.zero1 || fb->msaa.zero2 || fb->msaa.zero4) {
- pandecode_msg("XXX: multisampling zero tripped\n");
- pandecode_prop("zero1 = %" PRIx64, fb->msaa.zero1);
- pandecode_prop("zero2 = %" PRIx64, fb->msaa.zero2);
- pandecode_prop("zero4 = %" PRIx64, fb->msaa.zero4);
- }
-
- pandecode_indent--;
- pandecode_log("},\n");
+ DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, fb, "Bifrost Params:\n");
} else {
- struct mali_local_storage_packed ls = fb->shared_memory;
- DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n");
+ DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
}
- info.width = fb->width1 + 1;
- info.height = fb->height1 + 1;
- info.rt_count = fb->rt_count_1 + 1;
-
- pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
- pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
- pandecode_prop("width2 = MALI_POSITIVE(%d)", fb->width2 + 1);
- pandecode_prop("height2 = MALI_POSITIVE(%d)", fb->height2 + 1);
-
- pandecode_prop("unk1 = 0x%x", fb->unk1);
- pandecode_prop("unk2 = 0x%x", fb->unk2);
- pandecode_prop("rt_count_1 = MALI_POSITIVE(%d)", fb->rt_count_1 + 1);
- pandecode_prop("rt_count_2 = %d", fb->rt_count_2);
+ info.width = params.width;
+ info.height = params.height;
+ info.rt_count = params.render_target_count;
+ DUMP_UNPACKED(MULTI_TARGET_FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
- pandecode_log(".mfbd_flags = ");
- pandecode_log_decoded_flags(mfbd_flag_info, fb->mfbd_flags);
- pandecode_log_cont(",\n");
-
- if (fb->clear_stencil)
- pandecode_prop("clear_stencil = 0x%x", fb->clear_stencil);
-
- if (fb->clear_depth)
- pandecode_prop("clear_depth = %f", fb->clear_depth);
-
- if (!is_compute)
- if (is_bifrost)
- pandecode_bifrost_tiler_descriptor(fb);
- else {
- const struct mali_midgard_tiler_packed t = fb->tiler;
- const struct mali_midgard_tiler_weights_packed w = fb->tiler_weights;
- pandecode_midgard_tiler_descriptor(&t, &w, fb->width1 + 1, fb->height1 + 1, is_fragment, true);
+ if (!is_compute) {
+ if (is_bifrost) {
+ DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, BIFROST_TILER_POINTER, fb, "Tiler Pointer");
+ } else {
+ const void *t = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER);
+ const void *w = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS);
+ pandecode_midgard_tiler_descriptor(t, w, params.width, params.height, is_fragment, true);
}
- else
+ } else {
pandecode_msg("XXX: skipping compute MFBD, fixme\n");
+ }
- if (fb->zero3 || fb->zero4) {
- pandecode_msg("XXX: framebuffer zeros tripped\n");
- pandecode_prop("zero3 = 0x%" PRIx32, fb->zero3);
- pandecode_prop("zero4 = 0x%" PRIx32, fb->zero4);
+ if (is_bifrost) {
+ pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PADDING, padding);
}
pandecode_indent--;
- pandecode_log("};\n");
+ pandecode_log("\n");
- gpu_va += sizeof(struct mali_framebuffer);
+ gpu_va += MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
- info.has_extra = (fb->mfbd_flags & MALI_MFBD_EXTRA) && is_fragment;
+ info.has_extra = params.has_zs_crc_extension;
if (info.has_extra) {
- mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
- const struct mali_framebuffer_extra *PANDECODE_PTR_VAR(fbx, mem, (mali_ptr) gpu_va);
-
- pandecode_log("struct mali_framebuffer_extra fb_extra_%"PRIx64"_%d = {\n", gpu_va, job_no);
- pandecode_indent++;
-
- MEMORY_PROP(fbx, checksum);
-
- if (fbx->checksum_stride)
- pandecode_prop("checksum_stride = %d", fbx->checksum_stride);
-
- pandecode_log(".flags_hi = ");
- pandecode_log_decoded_flags(mfbd_extra_flag_hi_info, fbx->flags_hi);
- pandecode_log_cont(",\n");
-
- pandecode_log(".flags_lo = ");
- pandecode_log_decoded_flags(mfbd_extra_flag_lo_info, fbx->flags_lo);
- pandecode_log_cont(",\n");
-
- pandecode_prop("zs_block = %s", mali_block_format_as_str(fbx->zs_block));
- pandecode_prop("zs_samples = MALI_POSITIVE(%u)", fbx->zs_samples + 1);
-
- if (fbx->zs_block == MALI_BLOCK_FORMAT_AFBC) {
- pandecode_log(".ds_afbc = {\n");
- pandecode_indent++;
-
- MEMORY_PROP_DIR(fbx->ds_afbc, depth_stencil_afbc_metadata);
- pandecode_prop("depth_stencil_afbc_stride = %d",
- fbx->ds_afbc.depth_stencil_afbc_stride);
- MEMORY_PROP_DIR(fbx->ds_afbc, depth_stencil);
-
- pandecode_log(".flags = ");
- pandecode_log_decoded_flags(afbc_fmt_flag_info, fbx->ds_afbc.flags);
- pandecode_log_cont(",\n");
-
- if (fbx->ds_afbc.padding) {
- pandecode_msg("XXX: Depth/stencil AFBC zeros tripped\n");
- pandecode_prop("padding = 0x%" PRIx64, fbx->ds_afbc.padding);
- }
-
- pandecode_indent--;
- pandecode_log("},\n");
- } else {
- pandecode_log(".ds_linear = {\n");
- pandecode_indent++;
-
- if (fbx->ds_linear.depth) {
- MEMORY_PROP_DIR(fbx->ds_linear, depth);
- pandecode_prop("depth_stride = %d",
- fbx->ds_linear.depth_stride);
- pandecode_prop("depth_layer_stride = %d",
- fbx->ds_linear.depth_layer_stride);
- } else if (fbx->ds_linear.depth_stride || fbx->ds_linear.depth_layer_stride) {
- pandecode_msg("XXX: depth stride zero tripped %d %d\n", fbx->ds_linear.depth_stride, fbx->ds_linear.depth_layer_stride);
- }
-
- if (fbx->ds_linear.stencil) {
- MEMORY_PROP_DIR(fbx->ds_linear, stencil);
- pandecode_prop("stencil_stride = %d",
- fbx->ds_linear.stencil_stride);
- pandecode_prop("stencil_layer_stride = %d",
- fbx->ds_linear.stencil_layer_stride);
- } else if (fbx->ds_linear.stencil_stride || fbx->ds_linear.stencil_layer_stride) {
- pandecode_msg("XXX: stencil stride zero tripped %d %d\n", fbx->ds_linear.stencil_stride, fbx->ds_linear.stencil_layer_stride);
- }
-
- if (fbx->ds_linear.depth_stride_zero ||
- fbx->ds_linear.stencil_stride_zero) {
- pandecode_msg("XXX: Depth/stencil zeros tripped\n");
- pandecode_prop("depth_stride_zero = 0x%x",
- fbx->ds_linear.depth_stride_zero);
- pandecode_prop("stencil_stride_zero = 0x%x",
- fbx->ds_linear.stencil_stride_zero);
- }
-
- pandecode_indent--;
- pandecode_log("},\n");
- }
-
- if (fbx->clear_color_1 | fbx->clear_color_2) {
- pandecode_prop("clear_color_1 = 0x%" PRIx32, fbx->clear_color_1);
- pandecode_prop("clear_color_2 = 0x%" PRIx32, fbx->clear_color_2);
- }
-
- if (fbx->zero3) {
- pandecode_msg("XXX: fb_extra zeros tripped\n");
- pandecode_prop("zero3 = 0x%" PRIx64, fbx->zero3);
- }
-
- pandecode_indent--;
- pandecode_log("};\n");
+ struct pandecode_mapped_memory *mem =
+ pandecode_find_mapped_gpu_mem_containing(gpu_va);
+ const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, mem, (mali_ptr)gpu_va);
+ DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
+ pandecode_log("\n");
- gpu_va += sizeof(struct mali_framebuffer_extra);
+ gpu_va += MALI_ZS_CRC_EXTENSION_LENGTH;
}
if (is_fragment)
- pandecode_render_target(gpu_va, job_no, fb);
+ pandecode_render_target(gpu_va, job_no, is_bifrost, gpu_id, ¶ms);
return info;
}
if (is_bifrost)
pandecode_compute_fbd(p->shared & ~1, job_no);
- else if (p->shared & MALI_MFBD)
- fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->shared) & FBD_MASK, job_no, false, job_type == MALI_JOB_TYPE_COMPUTE, false);
+ else if (p->shared & MALI_FBD_TAG_IS_MFBD)
+ fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->shared) & ~MALI_FBD_TAG_MASK,
+ job_no, false, job_type == MALI_JOB_TYPE_COMPUTE, is_bifrost, gpu_id);
else if (job_type == MALI_JOB_TYPE_COMPUTE)
pandecode_compute_fbd((u64) (uintptr_t) p->shared, job_no);
else
/* MRT blend fields are used whenever MFBD is used, with
* per-RT descriptors */
- if (job_type == MALI_JOB_TYPE_TILER && (is_bifrost || p->shared & MALI_MFBD)) {
+ if (job_type == MALI_JOB_TYPE_TILER &&
+ (is_bifrost || p->shared & MALI_FBD_TAG_IS_MFBD)) {
void* blend_base = ((void *) cl) + MALI_STATE_LENGTH;
for (unsigned i = 0; i < fbd_info.rt_count; i++) {
{
const struct mali_payload_fragment *PANDECODE_PTR_VAR(s, mem, payload);
- bool is_mfbd = s->framebuffer & MALI_MFBD;
+ bool is_mfbd = s->framebuffer & MALI_FBD_TAG_IS_MFBD;
if (!is_mfbd && is_bifrost)
pandecode_msg("XXX: Bifrost fragment must use MFBD\n");
struct pandecode_fbd info;
if (is_mfbd)
- info = pandecode_mfbd_bfr(s->framebuffer & FBD_MASK, job_no, true, false, is_bifrost);
+ info = pandecode_mfbd_bfr(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no,
+ true, false, is_bifrost, gpu_id);
else
- info = pandecode_sfbd(s->framebuffer & FBD_MASK, job_no, true, gpu_id);
+ info = pandecode_sfbd(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no,
+ true, gpu_id);
/* Compute the tag for the tagged pointer. This contains the type of
* FBD (MFBD/SFBD), and in the case of an MFBD, information about which
* additional structures follow the MFBD header (an extra payload or
* not, as well as a count of render targets) */
- unsigned expected_tag = is_mfbd ? MALI_MFBD : 0;
+ unsigned expected_tag = is_mfbd ? MALI_FBD_TAG_IS_MFBD : 0;
if (is_mfbd) {
if (info.has_extra)
- expected_tag |= MALI_MFBD_TAG_EXTRA;
+ expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
expected_tag |= (MALI_POSITIVE(info.rt_count) << 2);
}
/* The FBD is a tagged pointer */
- unsigned tag = (s->framebuffer & ~FBD_MASK);
+ unsigned tag = (s->framebuffer & MALI_FBD_TAG_MASK);
if (tag != expected_tag)
pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag);
<value name="AFBC" value="3"/>
</enum>
+ <enum name="Block Format v7">
+ <value name="No Write" value="0"/>
+ <value name="Tiled U-Interleaved" value="1"/>
+ <value name="Linear" value="2"/>
+ <value name="AFBC" value="12"/>
+ <value name="AFBC Tiled" value="13"/>
+ </enum>
+
<enum name="Mipmap Mode">
<value name="Nearest" value="0"/>
<value name="None" value="1"/>
<value name="A1B5G5R5" value="29"/>
</enum>
+ <enum name="MFBD Color Format">
+ <value name="RAW8" value="0"/>
+ <value name="RAW16" value="1"/>
+ <value name="RAW24" value="2"/>
+ <value name="RAW32" value="3"/>
+ <value name="RAW48" value="4"/>
+ <value name="RAW64" value="5"/>
+ <value name="RAW96" value="6"/>
+ <value name="RAW128" value="7"/>
+ <value name="RAW192" value="8"/>
+ <value name="RAW256" value="9"/>
+ <value name="RAW384" value="10"/>
+ <value name="RAW512" value="11"/>
+ <value name="RAW768" value="12"/>
+ <value name="RAW1024" value="13"/>
+ <value name="RAW1536" value="14"/>
+ <value name="RAW2048" value="15"/>
+ <value name="R8" value="16"/>
+ <value name="R8G8" value="17"/>
+ <value name="R8G8B8" value="18"/>
+ <value name="R8G8B8A8" value="19"/>
+ <value name="R4G4B4A4" value="20"/>
+ <value name="R5G6B5" value="21"/>
+ <value name="R8G8B8_FROM_R8G8B8A2" value="22"/>
+ <value name="R10G10B10A2" value="24"/>
+ <value name="A2B10G10R10" value="25"/>
+ <value name="R5G5B5A1" value="28"/>
+ <value name="A1B5G5R5" value="29"/>
+ <value name="NATIVE" value="31"/>
+ </enum>
+
<enum name="Downsampling Accumulation Mode">
<value name="Unsigned normalized integer" value="0"/>
<value name="Signed normalized integer" value="1"/>
<value name="D32_S8X24" value="15"/>
</enum>
+ <enum name="ZS Preload Format">
+ <value name="D32_S8X24" value="4"/>
+ </enum>
+
<enum name="S Format">
<value name="S8" value="1"/>
<value name="S8X8" value="2"/>
<value name="D3D 16x Grid" value="4"/>
</enum>
+ <enum name="Z Internal Format">
+ <value name="D16" value="0"/>
+ <value name="D24" value="1"/>
+ <value name="D32" value="2"/>
+ </enum>
+
+ <enum name="FBD Tag">
+ <value name="IS_MFBD" value="1"/>
+ <value name="HAS_ZS_RT" value="2"/>
+ <value name="MASK" value="63"/>
+ </enum>
+
+ <struct name="Multi-Target Framebuffer Parameters">
+ <field name="Width" size="16" start="0:0" type="uint" modifier="minus(1)"/>
+ <field name="Height" size="16" start="0:16" type="uint" modifier="minus(1)"/>
+ <field name="Bound Min X" size="16" start="1:0" type="uint"/>
+ <field name="Bound Min Y" size="16" start="1:16" type="uint"/>
+ <field name="Bound Max X" size="16" start="2:0" type="uint"/>
+ <field name="Bound Max Y" size="16" start="2:16" type="uint"/>
+ <field name="Sample Count" size="3" start="3:0" type="uint" default="1" modifier="log2"/>
+ <field name="Sample Pattern" size="3" start="3:3" type="Sample Pattern"/>
+ <field name="Tie-Break Rule" size="3" start="3:6" type="Tie-Break Rule"/>
+ <field name="Effective Tile Size" size="4" start="3:9" type="uint" modifier="log2"/>
+ <field name="X Downsampling Scale" size="3" start="3:13" type="uint"/>
+ <field name="Y Downsampling Scale" size="3" start="3:16" type="uint"/>
+ <field name="Render Target Count" size="4" start="3:19" type="uint" modifier="minus(1)"/>
+ <field name="Color Buffer Allocation" size="8" start="3:24" type="uint" modifier="shr(10)"/>
+ <field name="S Clear" size="8" start="4:0" type="uint"/>
+ <field name="S Write Enable" size="1" start="4:8" type="bool"/>
+ <field name="S Preload Enable" size="1" start="4:9" type="bool"/>
+ <field name="S Unload Enable" size="1" start="4:10" type="bool"/>
+ <field name="Z Internal Format" size="2" start="4:16" type="Z Internal Format"/>
+ <field name="Z Write Enable" size="1" start="4:18" type="bool"/>
+ <field name="Z Preload Enable" size="1" start="4:19" type="bool"/>
+ <field name="Z Unload Enable" size="1" start="4:20" type="bool"/>
+ <field name="Has ZS CRC Extension" size="1" start="4:21" type="bool"/>
+ <field name="CRC Read Enable" size="1" start="4:30" type="bool"/>
+ <field name="CRC Write Enable" size="1" start="4:31" type="bool"/>
+ <field name="Z Clear" size="32" start="5:0" type="float"/>
+ </struct>
+
+ <struct name="ZS CRC Extension">
+ <field name="CRC Base" size="64" start="0:0" type="address"/>
+ <field name="CRC Row Stride" size="32" start="2:0" type="uint"/>
+ <field name="ZS Write Format" size="4" start="3:0" type="ZS Format"/>
+ <field name="ZS Block Format" size="2" start="3:4" type="Block Format"/>
+ <field name="ZS Block Format v7" size="2" start="3:4" type="Block Format v7"/>
+ <field name="ZS MSAA" size="2" start="3:6" default="Single" type="MSAA"/>
+ <field name="ZS Big Endian" size="1" start="3:8" type="bool"/>
+ <field name="ZS Clean Pixel Write Enable" size="1" start="3:10" type="bool"/>
+ <field name="CRC Render Target" size="4" start="3:11" type="uint"/>
+ <field name="S Write Format" size="4" start="3:16" type="S Format"/>
+ <field name="S Block Format" size="2" start="3:20" type="Block Format"/>
+ <field name="S MSAA" size="2" start="3:22" default="Single" type="MSAA"/>
+ <field name="ZS Preload Format" size="4" start="3:28" type="ZS Preload Format"/>
+ <field name="ZS Writeback Base" size="64" start="4:0" type="address"/>
+ <field name="ZS Writeback Row Stride" size="32" start="6:0" type="uint"/>
+ <field name="ZS Writeback Surface Stride" size="32" start="7:0" type="uint"/>
+ <field name="S Writeback Base" size="64" start="8:0" type="address"/>
+ <field name="S Writeback Row Stride" size="32" start="10:0" type="uint"/>
+ <field name="S Writeback Surface Stride" size="32" start="11:0" type="uint"/>
+ <field name="ZS AFBC Header" size="64" start="4:0" type="address"/>
+ <field name="ZS AFBC Row Stride" size="13" start="6:0" type="uint"/>
+ <field name="ZS AFBC Chunk Size" size="12" start="7:0" type="uint"/>
+ <field name="ZS AFBC Sparse" size="1" start="7:16" type="bool"/>
+ <field name="ZS AFBC Body" size="64" start="8:0" type="address"/>
+ <field name="ZS AFBC Body Size" size="32" start="10:0" type="uint"/>
+ <field name="ZS Preload Base" size="64" start="12:0" type="address"/>
+ <field name="ZS Preload Row Stride" size="32" start="14:0" type="uint"/>
+ <field name="ZS Preload Surface Stride" size="32" start="15:0" type="uint"/>
+ <field name="CRC Clear Color" size="64" start="12:0" type="uint"/>
+ </struct>
+
+ <enum name="RT Endianness">
+ <value name="Little Endian" value="0"/>
+ <value name="Big Endian 2B" value="1"/>
+ <value name="Big Endian 4B" value="2"/>
+ <value name="Big Endian 8B" value="3"/>
+ </enum>
+
+ <enum name="YUV Conv K6">
+ <value name="0" value="0"/>
+ <value name="16" value="1"/>
+ </enum>
+
+ <enum name="YUV Conv K7 Clamp">
+ <value name="MINUS_128_TO_127" value="0"/>
+ <value name="MINUS_112_TO_111" value="1"/>
+ <value name="0_TO_255" value="2"/>
+ <value name="16_TO_239" value="3"/>
+ </enum>
+
+ <enum name="YUV Conv K8">
+ <value name="220" value="0"/>
+ <value name="256" value="1"/>
+ </enum>
+
+ <enum name="YUV Swizzle">
+ <value name="YUVA" value="0"/>
+ <value name="YVUA" value="1"/>
+ <value name="UYVA" value="2"/>
+ <value name="UVYA" value="3"/>
+ <value name="VUYA" value="4"/>
+ <value name="VYUA" value="5"/>
+ <value name="Y00A" value="6"/>
+ <value name="YXXA" value="7"/>
+ </enum>
+
+ <enum name="YUV Conversion Mode">
+ <value name="No Conversion" value="0"/>
+ <value name="BT 601" value="3"/>
+ <value name="BT 709" value="4"/>
+ <value name="BT 2020" value="6"/>
+ </enum>
+
+ <enum name="YUV Cr Siting">
+ <value name="Co-Sited" value="0"/>
+ <value name="Center Y" value="1"/>
+ <value name="Center X" value="2"/>
+ <value name="Center" value="3"/>
+ <value name="One Quarter" value="4"/>
+ <value name="Three Quarters" value="5"/>
+ </enum>
+
+ <struct name="Render Target">
+ <field name="Internal Buffer Offset" size="12" start="0:4" type="uint" modifier="shr(4)"/>
+ <field name="YUV Enable" size="1" start="0:24" type="bool"/>
+ <field name="Dithered Clear" size="1" start="0:25" type="bool"/>
+ <field name="Internal Format" size="6" start="0:26" type="Color Buffer Internal Format"/>
+ <field name="Write Enable" size="1" start="1:0" type="bool"/>
+ <field name="Writeback Format" size="5" start="1:3" type="MFBD Color Format"/>
+ <field name="Writeback Endianness" size="2" start="1:8" type="RT Endianness"/>
+ <field name="Writeback Block Format" size="2" start="1:10" type="Block Format"/>
+ <field name="Writeback Block Format v7" size="4" start="1:8" type="Block Format v7"/>
+ <field name="Writeback MSAA" size="2" start="1:12" type="MSAA"/>
+ <field name="sRGB" size="1" start="1:14" type="bool"/>
+ <field name="Dithering Enable" size="1" start="1:15" type="bool"/>
+ <field name="Swizzle" size="12" start="1:16" type="uint"/>
+ <field name="Writeback Sampling Mode" size="2" start="1:29" type="Downsampling Accumulation Mode"/>
+ <field name="Clean Pixel Write Enable" size="1" start="1:31" type="bool"/>
+ <field name="Preload Enable" size="1" start="2:0" type="bool"/>
+ <field name="Unload Enable" size="1" start="2:1" type="bool"/>
+ <field name="Preload Format" size="5" start="2:3" type="MFBD Color Format"/>
+ <field name="Preload Endianness" size="2" start="2:8" type="RT Endianness"/>
+ <field name="Preload Block Format" size="4" start="2:10" type="Block Format"/>
+ <field name="Preload MSAA" size="2" start="2:14" type="MSAA"/>
+ <field name="YUV Conv K5" size="8" start="2:16" type="uint"/>
+ <field name="YUV Swizzle" size="3" start="2:16" type="YUV Swizzle"/>
+ <field name="YUV Full Range" size="1" start="2:20" type="bool"/>
+ <field name="YUV Conversion Mode" size="4" start="2:21" type="YUV Conversion Mode"/>
+ <field name="YUV Cr Siting" size="3" start="2:25" type="YUV Cr Siting"/>
+ <field name="YUV Unsigned Cr Range" size="1" start="2:28" type="bool"/>
+ <field name="YUV Conv K6" size="1" start="2:24" type="YUV Conv K6"/>
+ <field name="YUV Conv K7 Clamp" size="2" start="2:25" type="YUV Conv K7 Clamp"/>
+ <field name="YUV Conv K8" size="1" start="2:27" type="YUV Conv K8"/>
+ <field name="YUV Conv Disable" size="1" start="2:31" type="bool"/>
+ <field name="YUV Conv K1" size="8" start="3:0" type="uint"/>
+ <field name="YUV Conv K2" size="8" start="3:8" type="uint"/>
+ <field name="YUV Conv K3" size="8" start="3:16" type="uint"/>
+ <field name="YUV Conv K4" size="8" start="3:24" type="uint"/>
+ <field name="YUV Plane 0 Base" size="64" start="4:0" type="address"/>
+ <field name="YUV Plane 1 Base" size="64" start="6:0" type="address"/>
+ <field name="YUV Plane 2 Base" size="64" start="8:0" type="address"/>
+ <field name="YUV Plane 0 Stride" size="32" start="10:0" type="uint"/>
+ <field name="YUV Plane 1 2 Stride" size="32" start="11:0" type="uint"/>
+ <field name="AFBC Header" size="64" start="4:0" type="address"/>
+ <field name="AFBC Row Stride" size="13" start="6:0" type="uint"/>
+ <field name="AFBC Chunk Size" size="12" start="7:0" type="uint"/>
+ <field name="AFBC Sparse" size="1" start="7:16" type="bool"/>
+ <field name="AFBC YUV Transform Enable" size="1" start="7:17" type="bool"/>
+ <field name="AFBC Split Block Enable" size="1" start="7:18" type="bool"/>
+ <field name="AFBC Wide Block Enable" size="1" start="7:19" type="bool"/>
+ <field name="AFBC Body" size="64" start="8:0" type="address"/>
+ <field name="AFBC Body Size" size="32" start="10:0" type="uint"/>
+ <field name="Writeback Base" size="64" start="8:0" type="address"/>
+ <field name="Writeback Row Stride" size="32" start="10:0" type="uint"/>
+ <field name="Writeback Surface Stride" size="32" start="11:0" type="uint"/>
+ <field name="Preload Base" size="64" start="12:0" type="address"/>
+ <field name="Preload Row Stride" size="32" start="14:0" type="uint"/>
+ <field name="Preload Surface Stride" size="32" start="15:0" type="uint"/>
+ <field name="Clear Color 0" size="32" start="12:0" type="uint"/>
+ <field name="Clear Color 1" size="32" start="13:0" type="uint"/>
+ <field name="Clear Color 2" size="32" start="14:0" type="uint"/>
+ <field name="Clear Color 3" size="32" start="15:0" type="uint"/>
+ </struct>
+
+ <enum name="Pre Post Frame Shader Mode">
+ <value name="Never" value="0"/>
+ <value name="Always" value="1"/>
+ <value name="Intersect" value="2"/>
+ </enum>
+
+ <struct name="Bifrost Framebuffer Parameters">
+ <field name="Pre Frame 0" size="3" start="0:0" type="Pre Post Frame Shader Mode"/>
+ <field name="Pre Frame 1" size="3" start="0:3" type="Pre Post Frame Shader Mode"/>
+ <field name="Post Frame" size="3" start="0:6" type="Pre Post Frame Shader Mode"/>
+ <field name="Sample Locations" size="64" start="4:0" type="address"/>
+ <field name="Frame Shader DCDs" size="64" start="6:0" type="address"/>
+ </struct>
+
<struct name="Bifrost Tiler Heap">
<field name="Size" size="32" start="1:0" type="uint" modifier="align(4096)"/>
<field name="Base" size="64" start="2:0" type="address"/>
<field name="Heap" size="64" start="6:0" type="address"/>
<field name="Weights" size="32" start="8:0" type="Bifrost Tiler Weights" elements="8"/>
</struct>
+
+ <struct name="Bifrost Tiler Pointer">
+ <field name="Address" size="64" start="0:0" type="address"/>
+ </struct>
+
+ <struct name="Bifrost Framebuffer Padding" size="16">
+ </struct>
+
+ <aggregate name="Multi-Target Framebuffer">
+ <section name="Local Storage" offset="0" type="Local Storage"/>
+ <section name="Bifrost Parameters" offset="0" type="Bifrost Framebuffer Parameters"/>
+ <section name="Parameters" offset="32" type="Multi-Target Framebuffer Parameters"/>
+ <section name="Tiler" offset="56" type="Midgard Tiler"/>
+ <section name="Tiler Weights" offset="96" type="Midgard Tiler Weights"/>
+ <section name="Bifrost Tiler Pointer" offset="56" type="Bifrost Tiler Pointer"/>
+ <section name="Bifrost Padding" offset="64" type="Bifrost Framebuffer Padding"/>
+ </aggregate>
</panxml>
return format;
}
}
+
+enum mali_z_internal_format
+panfrost_get_z_internal_format(enum pipe_format fmt)
+{
+ switch (fmt) {
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM_S8_UINT:
+ return MALI_Z_INTERNAL_FORMAT_D16;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return MALI_Z_INTERNAL_FORMAT_D24;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return MALI_Z_INTERNAL_FORMAT_D32;
+ default:
+ unreachable("Unsupported depth/stencil format.");
+ }
+}
bool
panfrost_is_z24s8_variant(enum pipe_format fmt);
+enum mali_z_internal_format
+panfrost_get_z_internal_format(enum pipe_format fmt);
+
unsigned
panfrost_translate_swizzle_4(const unsigned char swizzle[4]);