From aff93f54190f4c934e25b9210d59db22bdd38ec7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 22 Apr 2020 11:20:25 -0700 Subject: [PATCH] freedreno/a6xx: split out const emit In order to inline the const emit and drop the per-gen vfuncs to emit the correct sort of packet, we should consolidate all of the entry- points to const emit in one object file, otherwise we'll end up with multiple copies per gen. Signed-off-by: Rob Clark Part-of: --- src/gallium/drivers/freedreno/Makefile.sources | 2 + src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 3 +- src/gallium/drivers/freedreno/a6xx/fd6_const.c | 305 +++++++++++++++++++++++ src/gallium/drivers/freedreno/a6xx/fd6_const.h | 50 ++++ src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 241 +----------------- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 21 +- src/gallium/drivers/freedreno/meson.build | 2 + 7 files changed, 375 insertions(+), 249 deletions(-) create mode 100644 src/gallium/drivers/freedreno/a6xx/fd6_const.c create mode 100644 src/gallium/drivers/freedreno/a6xx/fd6_const.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 0268bb1..7644da5 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -171,6 +171,8 @@ a6xx_SOURCES := \ a6xx/fd6_blitter.h \ a6xx/fd6_compute.c \ a6xx/fd6_compute.h \ + a6xx/fd6_const.c \ + a6xx/fd6_const.h \ a6xx/fd6_context.c \ a6xx/fd6_context.h \ a6xx/fd6_draw.c \ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index e6d5b32..fb29e7d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -31,6 +31,7 @@ #include "freedreno_resource.h" #include "fd6_compute.h" +#include "fd6_const.h" #include "fd6_context.h" #include "fd6_emit.h" @@ -140,7 +141,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) cs_program_emit(ring, v); fd6_emit_cs_state(ctx, ring, v); - ir3_emit_cs_consts(v, ring, ctx, info); + fd6_emit_cs_consts(v, ring, ctx, info); foreach_bit(i, ctx->global_bindings.enabled_mask) nglobal++; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c new file mode 100644 index 0000000..e934266 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2016 Rob Clark + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fd6_const.h" + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +void +fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz, align_sz; + enum a6xx_state_src src; + + debug_assert((regid % 4) == 0); + + if (prsc) { + sz = 0; + src = SS6_INDIRECT; + } else { + sz = sizedwords; + src = SS6_DIRECT; + } + + align_sz = align(sz, 4); + + OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(src) | + CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, 0, 0); + } else { + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } + + /* Zero-pad to multiple of 4 dwords */ + for (i = sz; i < align_sz; i++) { + OUT_RING(ring, 0); + } +} + +void +fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) +{ + uint32_t anum = align(num, 2); + uint32_t i; + + debug_assert((regid % 4) == 0); + + OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum)); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(anum/2)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + + for (i = 0; i < num; i++) { + if (prscs[i]) { + if (write) { + OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } else { + OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); + OUT_RING(ring, 0xbad00000 | (i << 16)); + } + } + + for (; i < anum; i++) { + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + } +} + +static void +emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s) +{ + struct fd_context *ctx = emit->ctx; + const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4; + uint32_t dwords = 16; + + OUT_PKT7(ring, fd6_stage2opcode(s->type), 3); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) | + CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4)); + OUT_RB(ring, ctx->batch->tess_addrs_constobj); +} + +static void +emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, + uint32_t *params, int num_params) +{ + const unsigned regid = v->shader->const_state.offsets.primitive_param; + int size = MIN2(1 + regid, v->constlen) - regid; + if (size > 0) + fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL); +} + +static void +emit_tess_consts(struct fd6_emit *emit) +{ + struct fd_context *ctx = emit->ctx; + + struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( + ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); + + /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS + * size is dwords, since that's what LDG/STG use. + */ + unsigned num_vertices = + emit->hs ? + emit->info->vertices_per_patch : + emit->gs->shader->nir->info.gs.vertices_in; + + uint32_t vs_params[4] = { + emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ + emit->vs->shader->output_size * 4, /* vs vertex stride */ + 0, + 0 + }; + + emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params)); + + if (emit->hs) { + uint32_t hs_params[4] = { + emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ + emit->vs->shader->output_size * 4, /* vs vertex stride */ + emit->hs->shader->output_size, + emit->info->vertices_per_patch + }; + + emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params)); + emit_tess_bos(constobj, emit, emit->hs); + + if (emit->gs) + num_vertices = emit->gs->shader->nir->info.gs.vertices_in; + + uint32_t ds_params[4] = { + emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */ + emit->ds->shader->output_size * 4, /* ds vertex stride */ + emit->hs->shader->output_size, /* hs vertex stride (dwords) */ + emit->hs->shader->nir->info.tess.tcs_vertices_out + }; + + emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params)); + emit_tess_bos(constobj, emit, emit->ds); + } + + if (emit->gs) { + struct ir3_shader_variant *prev; + if (emit->ds) + prev = emit->ds; + else + prev = emit->vs; + + uint32_t gs_params[4] = { + prev->shader->output_size * num_vertices * 4, /* ds primitive stride */ + prev->shader->output_size * 4, /* ds vertex stride */ + 0, + 0, + }; + + num_vertices = emit->gs->shader->nir->info.gs.vertices_in; + emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params)); + } + + fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL); +} + +static void +emit_user_consts(struct fd6_emit *emit) +{ + static const enum pipe_shader_type types[] = { + PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL, + PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT, + }; + const struct ir3_shader_variant *variants[] = { + emit->vs, emit->hs, emit->ds, emit->gs, emit->fs, + }; + struct fd_context *ctx = emit->ctx; + unsigned sz = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { + if (!variants[i]) + continue; + sz += variants[i]->shader->ubo_state.cmdstream_size; + } + + struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( + ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING); + + for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { + if (!variants[i]) + continue; + ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]); + ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]); + } + + fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL); +} + +void +fd6_emit_consts(struct fd6_emit *emit) +{ + struct fd_context *ctx = emit->ctx; + struct fd6_context *fd6_ctx = fd6_context(ctx); + + if (emit->dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG)) + emit_user_consts(emit); + + if (emit->key.key.has_gs || emit->key.key.tessellation) + emit_tess_consts(emit); + + /* if driver-params are needed, emit each time: */ + const struct ir3_shader_variant *vs = emit->vs; + if (ir3_needs_vs_driver_params(vs)) { + struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( + ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING); + ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info); + fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL); + fd6_ctx->has_dp_state = true; + } else if (fd6_ctx->has_dp_state) { + fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL); + fd6_ctx->has_dp_state = false; + } +} + +void +fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v, + enum pipe_shader_type stage, struct fd_ringbuffer *ring) +{ + struct fd_context *ctx = emit->ctx; + + ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]); + ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]); +} + +void +fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info) +{ + ir3_emit_cs_consts(v, ring, ctx, info); +} + +void +fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring) +{ + ir3_emit_immediates(screen, v, ring); +} + +void +fd6_user_consts_size(struct ir3_ubo_analysis_state *state, + unsigned *packets, unsigned *size) +{ + ir3_user_consts_size(state, packets, size); +} + +void +fd6_emit_link_map(struct fd_screen *screen, + const struct ir3_shader_variant *producer, + const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) +{ + ir3_emit_link_map(screen, producer, v, ring); +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.h b/src/gallium/drivers/freedreno/a6xx/fd6_const.h new file mode 100644 index 0000000..d53bf82 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2016 Rob Clark + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef FD6_CONST_H +#define FD6_CONST_H + +#include "fd6_emit.h" + +void fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc); +void fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets); + +void fd6_emit_consts(struct fd6_emit *emit); +void fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v, + enum pipe_shader_type stage, struct fd_ringbuffer *ring); +void fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info); +void fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring); +void fd6_user_consts_size(struct ir3_ubo_analysis_state *state, + unsigned *packets, unsigned *size); +void fd6_emit_link_map(struct fd_screen *screen, + const struct ir3_shader_variant *producer, + const struct ir3_shader_variant *v, struct fd_ringbuffer *ring); + +#endif /* FD6_CONST_H */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 425efbb..3ec1c6c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -38,6 +38,7 @@ #include "fd6_emit.h" #include "fd6_blend.h" +#include "fd6_const.h" #include "fd6_context.h" #include "fd6_image.h" #include "fd6_program.h" @@ -46,92 +47,6 @@ #include "fd6_format.h" #include "fd6_zsa.h" -/* regid: base const register - * prsc or dwords: buffer containing constant values - * sizedwords: size of const value buffer - */ -static void -fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, - uint32_t regid, uint32_t offset, uint32_t sizedwords, - const uint32_t *dwords, struct pipe_resource *prsc) -{ - uint32_t i, sz, align_sz; - enum a6xx_state_src src; - - debug_assert((regid % 4) == 0); - - if (prsc) { - sz = 0; - src = SS6_INDIRECT; - } else { - sz = sizedwords; - src = SS6_DIRECT; - } - - align_sz = align(sz, 4); - - OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(src) | - CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); - if (prsc) { - struct fd_bo *bo = fd_resource(prsc)->bo; - OUT_RELOC(ring, bo, offset, 0, 0); - } else { - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; - } - - for (i = 0; i < sz; i++) { - OUT_RING(ring, dwords[i]); - } - - /* Zero-pad to multiple of 4 dwords */ - for (i = sz; i < align_sz; i++) { - OUT_RING(ring, 0); - } -} - -static void -fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, - uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) -{ - uint32_t anum = align(num, 2); - uint32_t i; - - debug_assert((regid % 4) == 0); - - OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum)); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(anum/2)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - - for (i = 0; i < num; i++) { - if (prscs[i]) { - if (write) { - OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); - } else { - OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); - } - } else { - OUT_RING(ring, 0xbad00000 | (i << 16)); - OUT_RING(ring, 0xbad00000 | (i << 16)); - } - } - - for (; i < anum; i++) { - OUT_RING(ring, 0xffffffff); - OUT_RING(ring, 0xffffffff); - } -} - /* Border color layout is diff from a4xx/a5xx.. if it turns out to be * the same as a6xx then move this somewhere common ;-) * @@ -807,140 +722,10 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3 } } -static void -emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s) -{ - struct fd_context *ctx = emit->ctx; - const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4; - uint32_t dwords = 16; - - OUT_PKT7(ring, fd6_stage2opcode(s->type), 3); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) | - CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4)); - OUT_RB(ring, ctx->batch->tess_addrs_constobj); -} - -static void -emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v, - uint32_t *params, int num_params) -{ - const unsigned regid = v->shader->const_state.offsets.primitive_param; - int size = MIN2(1 + regid, v->constlen) - regid; - if (size > 0) - fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL); -} - -static void -fd6_emit_tess_const(struct fd6_emit *emit) -{ - struct fd_context *ctx = emit->ctx; - - struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( - ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); - - /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS - * size is dwords, since that's what LDG/STG use. - */ - unsigned num_vertices = - emit->hs ? - emit->info->vertices_per_patch : - emit->gs->shader->nir->info.gs.vertices_in; - - uint32_t vs_params[4] = { - emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ - emit->vs->shader->output_size * 4, /* vs vertex stride */ - 0, - 0 - }; - - emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params)); - - if (emit->hs) { - uint32_t hs_params[4] = { - emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */ - emit->vs->shader->output_size * 4, /* vs vertex stride */ - emit->hs->shader->output_size, - emit->info->vertices_per_patch - }; - - emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params)); - emit_tess_bos(constobj, emit, emit->hs); - - if (emit->gs) - num_vertices = emit->gs->shader->nir->info.gs.vertices_in; - - uint32_t ds_params[4] = { - emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */ - emit->ds->shader->output_size * 4, /* ds vertex stride */ - emit->hs->shader->output_size, /* hs vertex stride (dwords) */ - emit->hs->shader->nir->info.tess.tcs_vertices_out - }; - - emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params)); - emit_tess_bos(constobj, emit, emit->ds); - } - - if (emit->gs) { - struct ir3_shader_variant *prev; - if (emit->ds) - prev = emit->ds; - else - prev = emit->vs; - - uint32_t gs_params[4] = { - prev->shader->output_size * num_vertices * 4, /* ds primitive stride */ - prev->shader->output_size * 4, /* ds vertex stride */ - 0, - 0, - }; - - num_vertices = emit->gs->shader->nir->info.gs.vertices_in; - emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params)); - } - - fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL); -} - -static void -fd6_emit_consts(struct fd6_emit *emit) -{ - static const enum pipe_shader_type types[] = { - PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL, - PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT, - }; - const struct ir3_shader_variant *variants[] = { - emit->vs, emit->hs, emit->ds, emit->gs, emit->fs, - }; - struct fd_context *ctx = emit->ctx; - unsigned sz = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { - if (!variants[i]) - continue; - sz += variants[i]->shader->ubo_state.cmdstream_size; - } - - struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( - ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING); - - for (unsigned i = 0; i < ARRAY_SIZE(types); i++) { - if (!variants[i]) - continue; - ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]); - ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]); - } - - fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL); -} - void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) { struct fd_context *ctx = emit->ctx; - struct fd6_context *fd6_ctx = fd6_context(ctx); struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; const struct fd6_program_state *prog = fd6_emit_get_prog(emit); const struct ir3_shader_variant *vs = emit->vs; @@ -1088,24 +873,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_emit_take_group(emit, ring, FD6_GROUP_PROG_FB_RAST, ENABLE_DRAW); } - if (dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG)) { - fd6_emit_consts(emit); - } - - if (emit->key.key.has_gs || emit->key.key.tessellation) - fd6_emit_tess_const(emit); - - /* if driver-params are needed, emit each time: */ - if (ir3_needs_vs_driver_params(vs)) { - struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( - ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING); - ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info); - fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL); - fd6_ctx->has_dp_state = true; - } else if (fd6_ctx->has_dp_state) { - fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL); - fd6_ctx->has_dp_state = false; - } + fd6_emit_consts(emit); struct ir3_stream_output_info *info = &fd6_last_shader(prog)->shader->stream_output; if (info->num_outputs) @@ -1177,10 +945,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); OUT_RING(obj, ir3_shader_nibo(fs)); - ir3_emit_ssbo_sizes(ctx->screen, fs, obj, - &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); - ir3_emit_image_dims(ctx->screen, fs, obj, - &ctx->shaderimg[PIPE_SHADER_FRAGMENT]); + fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring); fd6_emit_take_group(emit, obj, FD6_GROUP_IBO, ENABLE_DRAW); fd_ringbuffer_del(state); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index fa98aae..80eb0c8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -35,6 +35,7 @@ #include "freedreno_program.h" #include "fd6_program.h" +#include "fd6_const.h" #include "fd6_emit.h" #include "fd6_texture.h" #include "fd6_format.h" @@ -425,7 +426,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, vs); - ir3_emit_immediates(screen, vs, ring); + fd6_emit_immediates(screen, vs, ring); struct ir3_shader_linkage l = {0}; const struct ir3_shader_variant *last_shader = fd6_last_shader(state); @@ -510,8 +511,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, hs); - ir3_emit_immediates(screen, hs, ring); - ir3_emit_link_map(screen, vs, hs, ring); + fd6_emit_immediates(screen, hs, ring); + fd6_emit_link_map(screen, vs, hs, ring); OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1); OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) | @@ -520,8 +521,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, ds); - ir3_emit_immediates(screen, ds, ring); - ir3_emit_link_map(screen, hs, ds, ring); + fd6_emit_immediates(screen, ds, ring); + fd6_emit_link_map(screen, hs, ds, ring); shader_info *hs_info = &hs->shader->nir->info; OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1); @@ -701,11 +702,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, gs); - ir3_emit_immediates(screen, gs, ring); + fd6_emit_immediates(screen, gs, ring); if (ds) - ir3_emit_link_map(screen, ds, gs, ring); + fd6_emit_link_map(screen, ds, gs, ring); else - ir3_emit_link_map(screen, vs, gs, ring); + fd6_emit_link_map(screen, vs, gs, ring); OUT_PKT4(ring, REG_A6XX_VPC_PACK_GS, 1); OUT_RING(ring, A6XX_VPC_PACK_GS_POSITIONLOC(pos_loc) | @@ -818,7 +819,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); if (!binning_pass) - ir3_emit_immediates(screen, fs, ring); + fd6_emit_immediates(screen, fs, ring); } static struct fd_ringbuffer * @@ -1026,7 +1027,7 @@ fd6_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_stat unsigned packets, size; /* pre-calculate size required for userconst stateobj: */ - ir3_user_consts_size(&shader->ubo_state, &packets, &size); + fd6_user_consts_size(&shader->ubo_state, &packets, &size); /* also account for UBO addresses: */ packets += 1; diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 919baf0..86d4534 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -181,6 +181,8 @@ files_libfreedreno = files( 'a6xx/fd6_blitter.h', 'a6xx/fd6_compute.c', 'a6xx/fd6_compute.h', + 'a6xx/fd6_const.c', + 'a6xx/fd6_const.h', 'a6xx/fd6_context.c', 'a6xx/fd6_context.h', 'a6xx/fd6_draw.c', -- 2.7.4