From bd55533f5b00cdf76ec474d8c300fe8742ca11b6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 14 Jun 2020 11:36:05 -0700 Subject: [PATCH] freedreno/ir3: add accessor for const_state We are going to want to move this back to the variant, and come up with a different strategy for binning/nonbinning to share the same constant layout, in order to implement shader-cache support. (Since then we can have a mix of dynamically compiled variants and cache hits, so there is no good place to serialize the const-state.) To reduce the churn as we re-arrange things, move direct access to the const-state to a helper fxn. This patch is the boring churny part. Signed-off-by: Rob Clark Part-of: --- src/freedreno/computerator/a6xx.c | 2 +- src/freedreno/ir3/ir3_a4xx.c | 2 +- src/freedreno/ir3/ir3_compiler_nir.c | 17 ++++++++++------- src/freedreno/ir3/ir3_cp.c | 2 +- src/freedreno/ir3/ir3_parser.y | 2 +- src/freedreno/ir3/ir3_shader.c | 2 +- src/freedreno/ir3/ir3_shader.h | 6 ++++++ src/freedreno/vulkan/tu_pipeline.c | 10 +++++----- src/gallium/drivers/freedreno/a6xx/fd6_const.c | 6 ++++-- src/gallium/drivers/freedreno/ir3/ir3_const.h | 18 +++++++++--------- 10 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 5cf491c..72b0f06 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -208,7 +208,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3 struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); struct ir3_shader_variant *v = ir3_kernel->v; - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t base = const_state->offsets.immediate; int size = const_state->immediates_count; diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 8dbc61c..968b1d7 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -212,7 +212,7 @@ get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr, /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(ctx->so); unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[index]; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 91ef00b..88de112 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -110,7 +110,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + struct ir3_const_state *const_state = ir3_const_state(ctx->so); unsigned n = const_state->offsets.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx->block, r); @@ -772,7 +772,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1; - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(ctx->so); unsigned ubo = regid(const_state->offsets.ubo, 0); const unsigned ptrsz = ir3_pointer_size(ctx->compiler); @@ -848,7 +848,7 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { /* SSBO size stored as a const starting at ssbo_sizes: */ - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(ctx->so); unsigned blk_idx = nir_src_as_uint(intr->src[0]); unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + const_state->ssbo_size.off[blk_idx]; @@ -1219,7 +1219,8 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr * bytes-per-pixel should have been emitted in 2nd slot of * image_dims. See ir3_shader::emit_image_dims(). */ - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + const struct ir3_const_state *const_state = + ir3_const_state(ctx->so); unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[nir_src_as_uint(intr->src[0])]; struct ir3_instruction *aux = create_uniform(b, cb + 1); @@ -1435,8 +1436,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst = NULL; } - const unsigned primitive_param = ctx->so->shader->const_state.offsets.primitive_param * 4; - const unsigned primitive_map = ctx->so->shader->const_state.offsets.primitive_map * 4; + const struct ir3_const_state *const_state = ir3_const_state(ctx->so); + const unsigned primitive_param = const_state->offsets.primitive_param * 4; + const unsigned primitive_map = const_state->offsets.primitive_map * 4; switch (intr->intrinsic) { case nir_intrinsic_load_uniform: @@ -2805,7 +2807,8 @@ emit_stream_out(struct ir3_context *ctx) * stripped out in the backend. */ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + const struct ir3_const_state *const_state = + ir3_const_state(ctx->so); unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index ed116a9..a9562af 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -197,7 +197,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, } /* Reallocate for 4 more elements whenever it's necessary */ - struct ir3_const_state *const_state = &ctx->so->shader->const_state; + struct ir3_const_state *const_state = ir3_const_state(ctx->so); if (const_state->immediate_idx == const_state->immediates_size * 4) { const_state->immediates_size += 4; const_state->immediates = realloc (const_state->immediates, diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 2b024d3..fe6cbe8 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -153,7 +153,7 @@ static struct ir3_register * dummy_dst(void) static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3) { - struct ir3_const_state *const_state = &variant->shader->const_state; + struct ir3_const_state *const_state = ir3_const_state(variant); assert((reg & 0x7) == 0); int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */ if (const_state->immediate_idx == const_state->immediates_size * 4) { diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index aa69946..a8f3dc7 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -494,7 +494,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) fprintf(out, "\n"); } - struct ir3_const_state *const_state = &so->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(so); for (i = 0; i < const_state->immediates_count; i++) { fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7e3e34a..f646f33 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -645,6 +645,12 @@ struct ir3_shader { struct ir3_shader_key key_mask; }; +static inline struct ir3_const_state * +ir3_const_state(const struct ir3_shader_variant *v) +{ + return &v->shader->const_state; +} + void * ir3_shader_assemble(struct ir3_shader_variant *v); struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key, bool binning_pass, bool *created); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index e3f62ff..1bd40f0 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -435,7 +435,7 @@ tu6_emit_xs_config(struct tu_cs *cs, /* emit immediates */ - const struct ir3_const_state *const_state = &xs->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(xs); uint32_t base = const_state->offsets.immediate; int size = const_state->immediates_count; @@ -653,7 +653,7 @@ static void tu6_emit_link_map(struct tu_cs *cs, const struct ir3_shader_variant *producer, const struct ir3_shader_variant *consumer) { - const struct ir3_const_state *const_state = &consumer->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(consumer); uint32_t base = const_state->offsets.primitive_map; uint32_t patch_locs[MAX_VARYING] = { }, num_loc; num_loc = ir3_link_geometry_stages(producer, consumer, patch_locs); @@ -1140,11 +1140,11 @@ tu6_emit_geometry_consts(struct tu_cs *cs, 0, 0, }; - uint32_t vs_base = vs->shader->const_state.offsets.primitive_param; + uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param; tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0, ARRAY_SIZE(params), params); - uint32_t gs_base = gs->shader->const_state.offsets.primitive_param; + uint32_t gs_base = ir3_const_state(gs)->offsets.primitive_param; tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0, ARRAY_SIZE(params), params); } @@ -1808,7 +1808,7 @@ tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, struct ir3_shader_variant *v) { link->ubo_state = v->shader->ubo_state; - link->const_state = v->shader->const_state; + link->const_state = *ir3_const_state(v); link->constlen = v->constlen; link->push_consts = shader->push_consts; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index d8ba060..5793199 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -134,7 +134,8 @@ static void emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s) { struct fd_context *ctx = emit->ctx; - const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4; + const struct ir3_const_state *const_state = ir3_const_state(s); + const unsigned regid = const_state->offsets.primitive_param * 4 + 4; uint32_t dwords = 16; OUT_PKT7(ring, fd6_stage2opcode(s->type), 3); @@ -150,7 +151,8 @@ static void emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, uint32_t *params, int num_params) { - const unsigned regid = v->shader->const_state.offsets.primitive_param; + const struct ir3_const_state *const_state = ir3_const_state(v); + const unsigned regid = const_state->offsets.primitive_param; int size = MIN2(1 + regid, v->constlen) - regid; if (size > 0) fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 235c603..92923d2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -134,7 +134,7 @@ static inline void ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.ubo; if (v->constlen > offset) { uint32_t params = const_state->num_ubos; @@ -177,7 +177,7 @@ static inline void ir3_emit_ssbo_sizes(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.ssbo_sizes; if (v->constlen > offset) { uint32_t sizes[align(const_state->ssbo_size.count, 4)]; @@ -197,7 +197,7 @@ static inline void ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.image_dims; if (v->constlen > offset) { uint32_t dims[align(const_state->image_dims.count, 4)]; @@ -250,7 +250,7 @@ static inline void ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t base = const_state->offsets.immediate; int size = const_state->immediates_count; @@ -272,7 +272,7 @@ ir3_emit_link_map(struct fd_screen *screen, const struct ir3_shader_variant *producer, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t base = const_state->offsets.primitive_map; uint32_t patch_locs[MAX_VARYING] = { }, num_loc; @@ -299,7 +299,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; @@ -424,7 +424,7 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin static inline bool ir3_needs_vs_driver_params(const struct ir3_shader_variant *v) { - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; return v->constlen > offset; @@ -437,7 +437,7 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v, { debug_assert(ir3_needs_vs_driver_params(v)); - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; uint32_t vertex_params[IR3_DP_VS_COUNT] = { [IR3_DP_VTXID_BASE] = info->index_size ? @@ -544,7 +544,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); /* emit compute-shader driver-params: */ - const struct ir3_const_state *const_state = &v->shader->const_state; + const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { ring_wfi(ctx->batch, ring); -- 2.7.4