From c14bb072301f68b68dcc9fff3e49210cb0819912 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 15 May 2015 21:25:36 -0700 Subject: [PATCH] i965: Add Gen9 surface state decoding Gen9 surface state is very similar to the previous generation. The important changes here are aux mode, and the way clear colors work. NOTE: There are some things intentionally left out of this decoding. v2: Redo the string for the aux buffer type to address compressed variants. v3: Use the shift for compression enable (instead of compression mode) (Topi) Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_state.h | 13 +++--- src/mesa/drivers/dri/i965/brw_state_batch.c | 20 +++++---- src/mesa/drivers/dri/i965/brw_state_dump.c | 56 +++++++++++++++++--------- src/mesa/drivers/dri/i965/gen8_surface_state.c | 15 +++---- 6 files changed, 68 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2dcc23c..abc11f6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1466,6 +1466,7 @@ struct brw_context uint32_t offset; uint32_t size; enum aub_state_struct_type type; + int index; } *state_batch_list; int state_batch_count; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8fd5a49..dedc381 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -608,6 +608,8 @@ #define GEN8_SURFACE_AUX_MODE_HIZ 3 /* Surface state DW7 */ +#define GEN9_SURFACE_RT_COMPRESSION_SHIFT 30 +#define GEN9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30) #define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index bc79fb6..987672f 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -229,11 +229,14 @@ void brw_destroy_caches( struct brw_context *brw ); #define BRW_BATCH_STRUCT(brw, s) \ intel_batchbuffer_data(brw, (s), sizeof(*(s)), RENDER_RING) -void *brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset); +void *__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset); +#define brw_state_batch(brw, type, size, alignment, out_offset) \ + __brw_state_batch(brw, type, size, alignment, 0, out_offset) /* brw_wm_surface_state.c */ void gen4_init_vtable_surface_functions(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 45dca69..a405a80 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -38,7 +38,8 @@ static void brw_track_state_batch(struct brw_context *brw, enum aub_state_struct_type type, uint32_t offset, - int size) + int size, + int index) { struct intel_batchbuffer *batch = &brw->batch; @@ -53,6 +54,7 @@ brw_track_state_batch(struct brw_context *brw, brw->state_batch_list[brw->state_batch_count].offset = offset; brw->state_batch_list[brw->state_batch_count].size = size; brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_list[brw->state_batch_count].index = index; brw->state_batch_count++; } @@ -108,18 +110,20 @@ brw_annotate_aub(struct brw_context *brw) * margin (4096 bytes, even if the object is just a 20-byte surface * state), and more buffers to walk and count for aperture size checking. * - * However, due to the restrictions inposed by the aperture size + * However, due to the restrictions imposed by the aperture size * checking performance hacks, we can't have the batch point at a * separate indirect state buffer, because once the batch points at * it, no more relocations can be added to it. So, we sneak these * buffers in at the top of the batchbuffer. */ void * -brw_state_batch(struct brw_context *brw, - enum aub_state_struct_type type, - int size, - int alignment, - uint32_t *out_offset) +__brw_state_batch(struct brw_context *brw, + enum aub_state_struct_type type, + int size, + int alignment, + int index, + uint32_t *out_offset) + { struct intel_batchbuffer *batch = &brw->batch; uint32_t offset; @@ -140,7 +144,7 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB))) - brw_track_state_batch(brw, type, offset, size); + brw_track_state_batch(brw, type, offset, size, index); *out_offset = offset; return batch->map + (offset>>2); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 155ef59..bce13d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -67,15 +67,6 @@ static const char *surface_tiling[] = { "Y-tiled" }; -static const char *surface_aux_mode[] = { - "AUX_NONE", - "AUX_MCS", - "AUX_APPEND", - "AUX_HIZ", - "RSVD", - "RSVD" -}; - static void batch_out(struct brw_context *brw, const char *name, uint32_t offset, int index, char *fmt, ...) PRINTFLIKE(5, 6); @@ -263,12 +254,30 @@ static float q_to_float(uint32_t data, int integer_end, int integer_start, return n * exp2(-(fractional_end - fractional_start + 1)); } -static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) +static void +dump_gen8_surface_state(struct brw_context *brw, uint32_t offset, int index) { - const char *name = "SURF"; uint32_t *surf = brw->batch.bo->virtual + offset; + int aux_mode = surf[6] & INTEL_MASK(2, 0); + const char *aux_str; + char *name; + + if (brw->gen >= 9 && (aux_mode == 1 || aux_mode == 5)) { + bool msrt = GET_BITS(surf[4], 5, 3) > 0; + bool compression = GET_FIELD(surf[7], GEN9_SURFACE_RT_COMPRESSION) == 1; + aux_str = ralloc_asprintf(NULL, "AUX_CCS_%c (%s, MULTISAMPLE_COUNT%c1)", + (aux_mode == 1) ? 'D' : 'E', + compression ? "Compressed RT" : "Uncompressed", + msrt ? '>' : '='); + } else { + static const char *surface_aux_mode[] = { "AUX_NONE", "AUX_MCS", + "AUX_APPEND", "AUX_HIZ", + "RSVD", "RSVD"}; + aux_str = ralloc_asprintf(NULL, "%s", surface_aux_mode[aux_mode]); + } - batch_out(brw, "SURF'", offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", + name = ralloc_asprintf(NULL, "SURF%03d", index); + batch_out(brw, name, offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), brw_surface_format_name(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)), (surf[0] & GEN7_SURFACE_IS_ARRAY) ? "array" : "", @@ -283,7 +292,7 @@ static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 2, "%dx%d [%s]\n", GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, - surface_aux_mode[surf[6] & INTEL_MASK(2, 0)]); + aux_str); batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n", GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1, (surf[3] & INTEL_MASK(17, 0)) + 1); @@ -298,14 +307,22 @@ static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) batch_out(brw, name, offset, 6, "AUX pitch: %d qpitch: %d\n", GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2, GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2); - batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", - GET_BITS(surf[7], 31, 31) ? 'R' : '-', - GET_BITS(surf[7], 30, 30) ? 'G' : '-', - GET_BITS(surf[7], 29, 29) ? 'B' : '-', - GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + if (brw->gen >= 9) { + batch_out(brw, name, offset, 7, "Clear color: R(%x)G(%x)B(%x)A(%x)\n", + surf[12], surf[13], surf[14], surf[15]); + } else { + batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", + GET_BITS(surf[7], 31, 31) ? 'R' : '-', + GET_BITS(surf[7], 30, 30) ? 'G' : '-', + GET_BITS(surf[7], 29, 29) ? 'B' : '-', + GET_BITS(surf[7], 28, 28) ? 'A' : '-'); + } for (int i = 8; i < 12; i++) batch_out(brw, name, offset, i, "0x%08x\n", surf[i]); + + ralloc_free((void *)aux_str); + ralloc_free(name); } static void @@ -694,7 +711,8 @@ dump_state_batch(struct brw_context *brw) break; case AUB_TRACE_SURFACE_STATE: if (brw->gen >= 8) { - dump_gen8_surface_state(brw, offset); + dump_gen8_surface_state(brw, offset, + brw->state_batch_list[i].index); } else if (brw->gen >= 7) { dump_gen7_surface_state(brw, offset); } else { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index d0c2d80..672fc70 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -100,11 +100,11 @@ horizontal_alignment(const struct intel_mipmap_tree *mt) } static uint32_t * -allocate_surface_state(struct brw_context *brw, uint32_t *out_offset) +allocate_surface_state(struct brw_context *brw, uint32_t *out_offset, int index) { int dwords = brw->gen >= 9 ? 16 : 13; - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - dwords * 4, 64, out_offset); + uint32_t *surf = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + dwords * 4, 64, index, out_offset); memset(surf, 0, dwords * 4); return surf; } @@ -120,7 +120,7 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, bool rw) { const unsigned mocs = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | @@ -164,6 +164,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *aux_mt = NULL; uint32_t aux_mode = 0; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; if (mt->format == MESA_FORMAT_S_UINT8) { @@ -179,7 +180,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - uint32_t *surf = allocate_surface_state(brw, surf_offset); + uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | format << BRW_SURFACE_FORMAT_SHIFT | @@ -310,7 +311,7 @@ gen8_emit_null_surface_state(struct brw_context *brw, unsigned samples, uint32_t *out_offset) { - uint32_t *surf = allocate_surface_state(brw, out_offset); + uint32_t *surf = allocate_surface_state(brw, out_offset, -1); surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT | @@ -392,7 +393,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - uint32_t *surf = allocate_surface_state(brw, &offset); + uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) | (is_array ? GEN7_SURFACE_IS_ARRAY : 0) | -- 2.7.4