From 1b9d69410ce0708f526f5e846e369b781897d10f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 10 Oct 2018 15:59:29 -0400 Subject: [PATCH] freedreno/a6xx: texture state obj Unfortunately gallium doesn't match what the hw wants perfectly here, in using a separate CSO for each texture/sampler. So we have to use a hash table to map the collection of texture/samplers to hw state object. We probably could use separate hw state objects for texture and sampler state, but mesa/st tends to update the tex and samp state together. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a6xx/fd6_context.c | 2 + src/gallium/drivers/freedreno/a6xx/fd6_context.h | 3 + src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 66 ++++----- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 6 + src/gallium/drivers/freedreno/a6xx/fd6_texture.c | 169 +++++++++++++++++++++++ src/gallium/drivers/freedreno/a6xx/fd6_texture.h | 38 ++++- 6 files changed, 251 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index b82889c..ab10ccb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -56,6 +56,8 @@ fd6_context_destroy(struct pipe_context *pctx) fd_context_cleanup_common_vbos(&fd6_ctx->base); + fd6_texture_fini(pctx); + free(fd6_ctx); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 30cc260..85245c8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -105,6 +105,9 @@ struct fd6_context { /*{*/ struct fd6_streamout_state tf; /*}*/ + + uint16_t tex_seqno; + struct hash_table *tex_cache; }; static inline struct fd6_context * diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 93f6a26..eb24fb9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -325,32 +325,32 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) u_upload_unmap(fd6_ctx->border_color_uploader); } -static bool -emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum a6xx_state_block sb, struct fd_texture_stateobj *tex) +bool +fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, + enum a6xx_state_block sb, struct fd_texture_stateobj *tex, + unsigned bcolor_offset) { bool needs_border = false; - unsigned bcolor_offset; - unsigned opcode, tex_samp_reg, tex_const_reg; + unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg; switch (sb) { case SB6_VS_TEX: opcode = CP_LOAD_STATE6_GEOM; - bcolor_offset = 0; tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO; tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT; break; case SB6_FS_TEX: opcode = CP_LOAD_STATE6_FRAG; - bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers; tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO; tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT; break; case SB6_CS_TEX: opcode = CP_LOAD_STATE6_FRAG; - bcolor_offset = 0; tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO; tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO; + tex_count_reg = 0; //REG_A6XX_SP_CS_TEX_COUNT; break; default: unreachable("bad state block"); @@ -359,8 +359,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, if (tex->num_samplers > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_flags(ctx->pipe, tex->num_samplers * 4 * 4, - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4, + FD_RINGBUFFER_OBJECT); for (unsigned i = 0; i < tex->num_samplers; i++) { static const struct fd6_sampler_stateobj dummy_sampler = {}; const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ? @@ -390,8 +390,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, if (tex->num_textures > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_flags(ctx->pipe, tex->num_textures * 16 * 4, - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4, + FD_RINGBUFFER_OBJECT); for (unsigned i = 0; i < tex->num_textures; i++) { static const struct fd6_pipe_sampler_view dummy_view = {}; const struct fd6_pipe_sampler_view *view = tex->textures[i] ? @@ -445,6 +445,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_ringbuffer_del(state); } + if (tex_count_reg) { + OUT_PKT4(ring, tex_count_reg, 1); + OUT_RING(ring, tex->num_textures); + } + return needs_border; } @@ -931,28 +936,25 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } - if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) { - needs_border |= emit_textures(ctx, ring, SB6_VS_TEX, - &ctx->tex[PIPE_SHADER_VERTEX]); - OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); - OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures); - } + if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) && + ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) { + struct fd6_texture_state *tex = fd6_texture_state(ctx, + SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]); + + needs_border |= tex->needs_border; - if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) { - needs_border |= emit_textures(ctx, ring, SB6_FS_TEX, - &ctx->tex[PIPE_SHADER_FRAGMENT]); - OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1); - OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7); } -#if 0 - OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1); - OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ? - ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) && + ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) { + struct fd6_texture_state *tex = fd6_texture_state(ctx, + SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); - OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1); - OUT_RING(ring, 0); -#endif + needs_border |= tex->needs_border; + + fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7); + } if (needs_border) emit_border_color(ctx, ring); @@ -988,8 +990,8 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_SHADER_TEX) { bool needs_border = false; - needs_border |= emit_textures(ctx, ring, SB6_CS_TEX, - &ctx->tex[PIPE_SHADER_COMPUTE]); + needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX, + &ctx->tex[PIPE_SHADER_COMPUTE], 0); if (needs_border) emit_border_color(ctx, ring); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 4e27597..0059527 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -45,6 +45,8 @@ struct fd_ringbuffer; enum fd6_state_id { FD6_GROUP_VS_CONST, FD6_GROUP_FS_CONST, + FD6_GROUP_VS_TEX, + FD6_GROUP_FS_TEX, }; struct fd6_state_group { @@ -174,6 +176,10 @@ fd6_stage2shadersb(enum shader_t type) } } +bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, + enum a6xx_state_block sb, struct fd_texture_stateobj *tex, + unsigned bcolor_offset); + void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit); void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c index 0f342ae..a48c4ee 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -30,9 +30,13 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/hash_table.h" #include "fd6_texture.h" #include "fd6_format.h" +#include "fd6_emit.h" + +static void fd6_texture_state_destroy(struct fd6_texture_state *state); static enum a6xx_tex_clamp tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border) @@ -94,6 +98,7 @@ fd6_sampler_state_create(struct pipe_context *pctx, return NULL; so->base = *cso; + so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno; if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) miplinear = true; @@ -141,6 +146,28 @@ fd6_sampler_state_create(struct pipe_context *pctx, } static void +fd6_sampler_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + struct fd6_sampler_stateobj *samp = hwcso; + + struct hash_entry *entry; + hash_table_foreach(fd6_ctx->tex_cache, entry) { + struct fd6_texture_state *state = entry->data; + + for (unsigned i = 0; i < ARRAY_SIZE(state->key.samp); i++) { + if (samp->seqno == state->key.samp[i].seqno) { + fd6_texture_state_destroy(entry->data); + _mesa_hash_table_remove(fd6_ctx->tex_cache, entry); + break; + } + } + } + + free(hwcso); +} + +static void fd6_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned nr, void **hwcso) @@ -215,6 +242,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->base.texture = prsc; so->base.reference.count = 1; so->base.context = pctx; + so->seqno = ++fd6_context(fd_context(pctx))->tex_seqno; so->texconst0 = A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) | @@ -310,6 +338,31 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, } static void +fd6_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *_view) +{ + struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + struct fd6_pipe_sampler_view *view = fd6_pipe_sampler_view(_view); + + struct hash_entry *entry; + hash_table_foreach(fd6_ctx->tex_cache, entry) { + struct fd6_texture_state *state = entry->data; + + for (unsigned i = 0; i < ARRAY_SIZE(state->key.view); i++) { + if (view->seqno == state->key.view[i].seqno) { + fd6_texture_state_destroy(entry->data); + _mesa_hash_table_remove(fd6_ctx->tex_cache, entry); + break; + } + } + } + + pipe_resource_reference(&view->base.texture, NULL); + + free(view); +} + +static void fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned nr, struct pipe_sampler_view **views) @@ -337,11 +390,127 @@ fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, } } + +static uint32_t +key_hash(const void *_key) +{ + const struct fd6_texture_key *key = _key; + uint32_t hash = _mesa_fnv32_1a_offset_bias; + hash = _mesa_fnv32_1a_accumulate_block(hash, key, sizeof(*key)); + return hash; +} + +static bool +key_equals(const void *_a, const void *_b) +{ + const struct fd6_texture_key *a = _a; + const struct fd6_texture_key *b = _b; + return memcmp(a, b, sizeof(struct fd6_texture_key)) == 0; +} + +struct fd6_texture_state * +fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb, + struct fd_texture_stateobj *tex) +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd6_texture_key key; + bool needs_border = false; + + memset(&key, 0, sizeof(key)); + + for (unsigned i = 0; i < tex->num_textures; i++) { + if (!tex->textures[i]) + continue; + + struct fd6_pipe_sampler_view *view = + fd6_pipe_sampler_view(tex->textures[i]); + + key.view[i].rsc_seqno = fd_resource(view->base.texture)->seqno; + key.view[i].seqno = view->seqno; + } + + for (unsigned i = 0; i < tex->num_samplers; i++) { + if (!tex->samplers[i]) + continue; + + struct fd6_sampler_stateobj *sampler = + fd6_sampler_stateobj(tex->samplers[i]); + + key.samp[i].seqno = sampler->seqno; + + needs_border |= sampler->needs_border; + } + + /* This will need update for HS/DS/GS: */ + if (unlikely(needs_border && (sb == SB6_FS_TEX))) { + /* TODO we could probably use fixed offsets for each shader + * stage and avoid the need for # of VS samplers to be part + * of the FS tex state.. but I don't think our handling of + * BCOLOR_OFFSET is actually correct, and trying to use a + * hard coded offset of 16 breaks things. + * + * Note that when this changes, then a corresponding change + * in emit_border_color() is also needed. + */ + key.bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers; + } + + uint32_t hash = key_hash(&key); + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(fd6_ctx->tex_cache, hash, &key); + + if (entry) { + return entry->data; + } + + struct fd6_texture_state *state = CALLOC_STRUCT(fd6_texture_state); + + state->key = key; + state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); + state->needs_border = needs_border; + + fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset); + + /* NOTE: uses copy of key in state obj, because pointer passed by caller + * is probably on the stack + */ + _mesa_hash_table_insert_pre_hashed(fd6_ctx->tex_cache, hash, + &state->key, state); + + return state; +} + +static void +fd6_texture_state_destroy(struct fd6_texture_state *state) +{ + fd_ringbuffer_del(state->stateobj); + free(state); +} + void fd6_texture_init(struct pipe_context *pctx) { + struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + pctx->create_sampler_state = fd6_sampler_state_create; + pctx->delete_sampler_state = fd6_sampler_state_delete; pctx->bind_sampler_states = fd6_sampler_states_bind; + pctx->create_sampler_view = fd6_sampler_view_create; + pctx->sampler_view_destroy = fd6_sampler_view_destroy; pctx->set_sampler_views = fd6_set_sampler_views; + + fd6_ctx->tex_cache = _mesa_hash_table_create(NULL, key_hash, key_equals); +} + +void +fd6_texture_fini(struct pipe_context *pctx) +{ + struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + + struct hash_entry *entry; + hash_table_foreach(fd6_ctx->tex_cache, entry) { + fd6_texture_state_destroy(entry->data); + } + ralloc_free(fd6_ctx->tex_cache); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h index a45ed6b..576afaa 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h @@ -41,6 +41,7 @@ struct fd6_sampler_stateobj { uint32_t texsamp0, texsamp1, texsamp2, texsamp3; bool saturate_s, saturate_t, saturate_r; bool needs_border; + uint16_t seqno; }; static inline struct fd6_sampler_stateobj * @@ -55,6 +56,7 @@ struct fd6_pipe_sampler_view { uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11; uint32_t offset; bool astc_srgb; + uint16_t seqno; }; static inline struct fd6_pipe_sampler_view * @@ -64,7 +66,7 @@ fd6_pipe_sampler_view(struct pipe_sampler_view *pview) } void fd6_texture_init(struct pipe_context *pctx); - +void fd6_texture_fini(struct pipe_context *pctx); static inline enum a6xx_tex_type fd6_tex_type(unsigned target) @@ -88,4 +90,38 @@ fd6_tex_type(unsigned target) } } +/* + * Texture stateobj: + * + * The sampler and sampler-view state is mapped to a single hardware + * stateobj which can be emit'd as a pointer in a CP_SET_DRAW_STATE + * packet, to avoid the overhead of re-generating the entire cmdstream + * when application toggles thru multiple different texture states. + */ + +struct fd6_texture_key { + struct { + /* We need to track the seqno of the rsc as well as of the + * sampler view, because resource shadowing/etc can result + * that the underlying bo changes (which means the previous + * state was no longer valid. + */ + uint16_t rsc_seqno; + uint16_t seqno; + } view[16]; + struct { + uint16_t seqno; + } samp[16]; + uint8_t bcolor_offset; +}; + +struct fd6_texture_state { + struct fd6_texture_key key; + struct fd_ringbuffer *stateobj; + bool needs_border; +}; + +struct fd6_texture_state * fd6_texture_state(struct fd_context *ctx, + enum a6xx_state_block sb, struct fd_texture_stateobj *tex); + #endif /* FD6_TEXTURE_H_ */ -- 2.7.4