From 5118dcf8c36043d346ba0b4b45e31dbea0012e40 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 4 Feb 2019 13:30:34 -0500 Subject: [PATCH] freedreno/a6xx: image/ssbo state emit Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 229 +++++++++++++---------- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 5 +- src/gallium/drivers/freedreno/a6xx/fd6_image.c | 227 +++++++++++----------- src/gallium/drivers/freedreno/a6xx/fd6_image.h | 8 +- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 2 + src/gallium/drivers/freedreno/a6xx/fd6_texture.c | 3 +- 6 files changed, 259 insertions(+), 215 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 1d20770..397c043 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -329,7 +329,10 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, enum a6xx_state_block sb, struct fd_texture_stateobj *tex, - unsigned bcolor_offset) + unsigned bcolor_offset, + /* can be NULL if no image/SSBO state to merge in: */ + const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf, + struct fd_shaderimg_stateobj *img) { bool needs_border = false; unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg; @@ -357,7 +360,6 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, unreachable("bad state block"); } - if (tex->num_samplers > 0) { struct fd_ringbuffer *state = fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4); @@ -388,10 +390,24 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, fd_ringbuffer_del(state); } - if (tex->num_textures > 0) { + unsigned num_merged_textures = tex->num_textures; + unsigned num_textures = tex->num_textures; + if (v) { + num_merged_textures += v->image_mapping.num_tex; + + /* There could be more bound textures than what the shader uses. + * Which isn't known at shader compile time. So in the case we + * are merging tex state, only emit the textures that the shader + * uses (since the image/SSBO related tex state comes immediately + * after) + */ + num_textures = v->image_mapping.tex_base; + } + + if (num_merged_textures > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_object(pipe, tex->num_textures * 16 * 4); - for (unsigned i = 0; i < tex->num_textures; i++) { + fd_ringbuffer_new_object(pipe, num_merged_textures * 16 * 4); + for (unsigned i = 0; i < num_textures; i++) { static const struct fd6_pipe_sampler_view dummy_view = {}; const struct fd6_pipe_sampler_view *view = tex->textures[i] ? fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view; @@ -424,13 +440,26 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, OUT_RING(state, 0); } + if (v) { + const struct ir3_ibo_mapping *mapping = &v->image_mapping; + + for (unsigned i = 0; i < mapping->num_tex; i++) { + unsigned idx = mapping->tex_to_image[i]; + if (idx & IBO_SSBO) { + fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]); + } else { + fd6_emit_image_tex(state, &img->si[idx]); + } + } + } + /* emit texture state: */ OUT_PKT7(ring, opcode, 3); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(tex->num_textures)); + CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures)); OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ OUT_PKT4(ring, tex_const_reg, 2); @@ -441,85 +470,81 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, if (tex_count_reg) { OUT_PKT4(ring, tex_count_reg, 1); - OUT_RING(ring, tex->num_textures); + OUT_RING(ring, num_merged_textures); } return needs_border; } -static void -emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum a6xx_state_block sb, struct fd_shaderbuf_stateobj *so) +/* Emits combined texture state, which also includes any Image/SSBO + * related texture state merged in (because we must have all texture + * state for a given stage in a single buffer). In the fast-path, if + * we don't need to merge in any image/ssbo related texture state, we + * just use cached texture stateobj. Otherwise we generate a single- + * use stateobj. + * + * TODO Is there some sane way we can still use cached texture stateobj + * with image/ssbo in use? + * + * returns whether border_color is required: + */ +static bool +fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit, + enum pipe_shader_type type, const struct ir3_shader_variant *v) { - unsigned count = util_last_bit(so->enabled_mask); - unsigned opcode; - - if (count == 0) - return; - - switch (sb) { - case SB6_IBO: - case SB6_CS_IBO: - opcode = CP_LOAD_STATE6_GEOM; - break; - default: - unreachable("bad state block"); - } + struct fd_context *ctx = emit->ctx; + bool needs_border = false; - OUT_PKT7(ring, opcode, 3 + (4 * count)); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(0) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(count)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (unsigned i = 0; i < count; i++) { - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - } + static const struct { + enum a6xx_state_block sb; + enum fd6_state_id state_id; + } s[PIPE_SHADER_TYPES] = { + [PIPE_SHADER_VERTEX] = { SB6_VS_TEX, FD6_GROUP_VS_TEX }, + [PIPE_SHADER_FRAGMENT] = { SB6_FS_TEX, FD6_GROUP_FS_TEX }, + }; -#if 0 - OUT_PKT7(ring, opcode, 3 + (2 * count)); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(1) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(count)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (unsigned i = 0; i < count; i++) { - struct pipe_shader_buffer *buf = &so->sb[i]; - unsigned sz = buf->buffer_size; + debug_assert(s[type].state_id); - /* width is in dwords, overflows into height: */ - sz /= 4; + if (!v->image_mapping.num_tex) { + /* in the fast-path, when we don't have to mix in any image/SSBO + * related texture state, we can just lookup the stateobj and + * re-emit that: + */ + if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) && + ctx->tex[type].num_textures > 0) { + struct fd6_texture_state *tex = fd6_texture_state(ctx, + s[type].sb, &ctx->tex[type]); - OUT_RING(ring, A6XX_SSBO_1_0_WIDTH(sz)); - OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(sz >> 16)); - } -#endif + needs_border |= tex->needs_border; - OUT_PKT7(ring, opcode, 3 + (2 * count)); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(2) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(count)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (unsigned i = 0; i < count; i++) { - struct pipe_shader_buffer *buf = &so->sb[i]; - if (buf->buffer) { - struct fd_resource *rsc = fd_resource(buf->buffer); - OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0); - } else { - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + fd6_emit_add_group(emit, tex->stateobj, s[type].state_id, 0x7); + } + } else { + /* In the slow-path, create a one-shot texture state object + * if either TEX|PROG|SSBO|IMAGE state is dirty: + */ + if (ctx->dirty_shader[type] & + (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | + FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) { + struct fd_texture_stateobj *tex = &ctx->tex[type]; + struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type]; + struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type]; + struct fd_ringbuffer *stateobj = + fd_submit_new_ringbuffer(ctx->batch->submit, + 0x1000, FD_RINGBUFFER_STREAMING); + unsigned bcolor_offset = + fd6_border_color_offset(ctx, s[type].sb, tex); + + needs_border |= fd6_emit_textures(ctx->pipe, stateobj, s[type].sb, tex, + bcolor_offset, v, buf, img); + + fd6_emit_add_group(emit, stateobj, s[type].state_id, 0x7); + + fd_ringbuffer_del(stateobj); } } + + return needs_border; } static struct fd_ringbuffer * @@ -906,34 +931,38 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } - if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) && - ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) { - struct fd6_texture_state *tex = fd6_texture_state(ctx, - SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]); - - needs_border |= tex->needs_border; - - fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7); - } + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vp); + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fp); - if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) && - ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) { - struct fd6_texture_state *tex = fd6_texture_state(ctx, - SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); + if (needs_border) + emit_border_color(ctx, ring); - needs_border |= tex->needs_border; + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & + (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { + struct fd_ringbuffer *state = + fd6_build_ibo_state(ctx, fp, PIPE_SHADER_FRAGMENT); + struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( + ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING); + const struct ir3_ibo_mapping *mapping = &fp->image_mapping; - fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7); - } + OUT_PKT7(obj, CP_LOAD_STATE6, 3); + OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | + CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo)); + OUT_RB(obj, state); - if (needs_border) - emit_border_color(ctx, ring); + OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); + OUT_RB(obj, state); - if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) - emit_ssbos(ctx, ring, SB6_IBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); + OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); + OUT_RING(obj, mapping->num_ibo); - if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) - fd6_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT); + fd6_emit_add_group(emit, obj, FD6_GROUP_IBO, 0x7); + fd_ringbuffer_del(obj); + fd_ringbuffer_del(state); + } if (emit->num_groups > 0) { OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups); @@ -970,7 +999,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_SHADER_TEX) { bool needs_border = false; needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX, - &ctx->tex[PIPE_SHADER_COMPUTE], 0); + &ctx->tex[PIPE_SHADER_COMPUTE], 0, NULL, NULL, NULL); if (needs_border) emit_border_color(ctx, ring); @@ -999,11 +1028,11 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures); #endif - if (dirty & FD_DIRTY_SHADER_SSBO) - emit_ssbos(ctx, ring, SB6_CS_IBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]); - - if (dirty & FD_DIRTY_SHADER_IMAGE) - fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE); +// if (dirty & FD_DIRTY_SHADER_SSBO) +// fd6_emit_ssbos(ctx, ring, PIPE_SHADER_COMPUTE); +// +// if (dirty & FD_DIRTY_SHADER_IMAGE) +// fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 9e57884..ab7ace4 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -53,6 +53,7 @@ enum fd6_state_id { FD6_GROUP_FS_CONST, FD6_GROUP_VS_TEX, FD6_GROUP_FS_TEX, + FD6_GROUP_IBO, FD6_GROUP_RASTERIZER, FD6_GROUP_ZSA, }; @@ -173,7 +174,9 @@ fd6_stage2shadersb(gl_shader_stage type) bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, enum a6xx_state_block sb, struct fd_texture_stateobj *tex, - unsigned bcolor_offset); + unsigned bcolor_offset, + const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf, + struct fd_shaderimg_stateobj *img); void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index f4e3492..153c004 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -32,16 +32,6 @@ #include "fd6_format.h" #include "fd6_texture.h" -static enum a6xx_state_block texsb[] = { - [PIPE_SHADER_COMPUTE] = SB6_CS_TEX, - [PIPE_SHADER_FRAGMENT] = SB6_FS_TEX, -}; - -static enum a6xx_state_block imgsb[] = { - [PIPE_SHADER_COMPUTE] = SB6_CS_IBO, - [PIPE_SHADER_FRAGMENT] = SB6_IBO, -}; - struct fd6_image { struct pipe_resource *prsc; enum pipe_format pfmt; @@ -57,16 +47,16 @@ struct fd6_image { uint32_t array_pitch; struct fd_bo *bo; uint32_t offset; + bool buffer; }; -static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg) +static void translate_image(struct fd6_image *img, const struct pipe_image_view *pimg) { enum pipe_format format = pimg->format; struct pipe_resource *prsc = pimg->resource; struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl; - if (!pimg->resource) { + if (!prsc) { memset(img, 0, sizeof(*img)); return; } @@ -81,45 +71,76 @@ static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg) img->bo = rsc->bo; if (prsc->target == PIPE_BUFFER) { - lvl = 0; + img->buffer = true; img->offset = pimg->u.buf.offset; - img->pitch = pimg->u.buf.size; + img->pitch = 0; img->array_pitch = 0; + + /* size is encoded with low 15b in WIDTH and high bits in + * HEIGHT, in units of elements: + */ + unsigned sz = prsc->width0; + img->width = sz & MASK(15); + img->height = sz >> 15; + img->depth = 0; } else { - lvl = pimg->u.tex.level; + img->buffer = false; + unsigned lvl = pimg->u.tex.level; img->offset = rsc->slices[lvl].offset; img->pitch = rsc->slices[lvl].pitch * rsc->cpp; img->array_pitch = rsc->layer_size; - } - img->width = u_minify(prsc->width0, lvl); - img->height = u_minify(prsc->height0, lvl); - img->depth = u_minify(prsc->depth0, lvl); + img->width = u_minify(prsc->width0, lvl); + img->height = u_minify(prsc->height0, lvl); + img->depth = u_minify(prsc->depth0, lvl); + } } -static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, - struct fd6_image *img, enum pipe_shader_type shader) +static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg) { - unsigned opcode = CP_LOAD_STATE6_FRAG; + enum pipe_format format = PIPE_FORMAT_R32_UINT; + struct pipe_resource *prsc = pimg->buffer; + struct fd_resource *rsc = fd_resource(prsc); - assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); + if (!prsc) { + memset(img, 0, sizeof(*img)); + return; + } + + img->prsc = prsc; + img->pfmt = format; + img->fmt = fd6_pipe2tex(format); + img->fetchsize = fd6_pipe2fetchsize(format); + img->type = fd6_tex_type(prsc->target); + img->srgb = util_format_is_srgb(format); + img->cpp = rsc->cpp; + img->bo = rsc->bo; + img->buffer = true; - OUT_PKT7(ring, opcode, 3 + 12); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + img->offset = pimg->buffer_offset; + img->pitch = 0; + img->array_pitch = 0; + /* size is encoded with low 15b in WIDTH and high bits in HEIGHT, + * in units of elements: + */ + unsigned sz = pimg->buffer_size / 4; + img->width = sz & MASK(15); + img->height = sz >> 15; + img->depth = 0; +} + +static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img) +{ OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) | + A6XX_TEX_CONST_0_TILE_MODE(fd_resource(img->prsc)->tile_mode) | fd6_tex_swiz(img->prsc, img->fmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | COND(img->srgb, A6XX_TEX_CONST_0_SRGB)); OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) | A6XX_TEX_CONST_1_HEIGHT(img->height)); OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) | + COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) | A6XX_TEX_CONST_2_TYPE(img->type) | A6XX_TEX_CONST_2_PITCH(img->pitch)); OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); @@ -136,101 +157,85 @@ static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); } -static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot, - struct fd6_image *img, enum pipe_shader_type shader) +void +fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg) { - unsigned opcode = CP_LOAD_STATE6_FRAG; + struct fd6_image img; + translate_image(&img, pimg); + emit_image_tex(ring, &img); +} - assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); +void +fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf) +{ + struct fd6_image img; + translate_buf(&img, pbuf); + emit_image_tex(ring, &img); +} -#if 0 - OUT_PKT7(ring, opcode, 3 + 4); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | - CP_LOAD_STATE6_0_STATE_TYPE(0) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0)); - OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch)); - OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch)); - OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp)); -#endif - -#if 0 - OUT_PKT7(ring, opcode, 3 + 2); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | - CP_LOAD_STATE6_0_STATE_TYPE(1) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) | - A6XX_SSBO_1_0_WIDTH(img->width)); - OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) | - A6XX_SSBO_1_1_DEPTH(img->depth)); -#endif - - OUT_PKT7(ring, opcode, 3 + 2); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | - CP_LOAD_STATE6_0_STATE_TYPE(2) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); +static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img) +{ + debug_assert(fd_resource(img->prsc)->tile_mode == 0); + + OUT_RING(ring, A6XX_IBO_0_FMT(img->fmt)); + OUT_RING(ring, A6XX_IBO_1_WIDTH(img->width) | + A6XX_IBO_1_HEIGHT(img->height)); + OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) | + COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) | + A6XX_IBO_2_TYPE(img->type)); + OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch)); if (img->bo) { - OUT_RELOCW(ring, img->bo, img->offset, 0, 0); + OUT_RELOCW(ring, img->bo, img->offset, + (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0); } else { OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth)); } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); } -/* Note that to avoid conflicts with textures and non-image "SSBO"s, images - * are placedd, in reverse order, at the end of the state block, so for - * example the sampler state: - * - * 0: first texture - * 1: second texture - * .... - * N-1: second image - * N: first image - */ -static unsigned -get_image_slot(unsigned index) -{ - /* TODO figure out real limit per generation, and don't hardcode. - * This needs to match get_image_slot() in ir3_compiler_nir. - * Possibly should be factored out into shared helper? - */ - const unsigned max_samplers = 16; - return max_samplers - index - 1; -} - -/* Emit required "SSBO" and sampler state. The sampler state is used by the - * hw for imageLoad(), and "SSBO" state for imageStore(). Returns max sampler - * used. - */ -void -fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, +/* Build combined image/SSBO "IBO" state, returns ownership of state reference */ +struct fd_ringbuffer * +fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, enum pipe_shader_type shader) { - struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader]; - unsigned enabled_mask = so->enabled_mask; + struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; + struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; + const struct ir3_ibo_mapping *mapping = &v->image_mapping; + + struct fd_ringbuffer *state = + fd_submit_new_ringbuffer(ctx->batch->submit, + mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING); - while (enabled_mask) { - unsigned index = u_bit_scan(&enabled_mask); - unsigned slot = get_image_slot(index); + assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); + + for (unsigned i = 0; i < mapping->num_ibo; i++) { struct fd6_image img; + unsigned idx = mapping->ibo_to_image[i]; - translate_image(&img, &so->si[index]); + if (idx & IBO_SSBO) { + translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]); + } else { + translate_image(&img, &imgso->si[idx]); + } - emit_image_tex(ring, slot, &img, shader); - emit_image_ssbo(ring, slot, &img, shader); + emit_image_ssbo(state, &img); } + + return state; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h index 0ee5393..a2dbfd3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h @@ -30,7 +30,11 @@ #include "freedreno_context.h" -void fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum pipe_shader_type shader); +void fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg); +void fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf); + +struct ir3_shader_variant; +struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx, + const struct ir3_shader_variant *v, enum pipe_shader_type shader); #endif /* FD6_IMAGE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 9180154..a9d8384 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -349,6 +349,7 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2); OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) | + A6XX_SP_VS_CONFIG_NIBO(s[VS].v->image_mapping.num_ibo) | A6XX_SP_VS_CONFIG_NTEX(s[VS].v->num_samp) | A6XX_SP_VS_CONFIG_NSAMP(s[VS].v->num_samp)); /* SP_VS_CONFIG */ OUT_RING(ring, s[VS].instrlen); /* SP_VS_INSTRLEN */ @@ -382,6 +383,7 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2); OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) | + A6XX_SP_FS_CONFIG_NIBO(s[FS].v->image_mapping.num_ibo) | A6XX_SP_FS_CONFIG_NTEX(s[FS].v->num_samp) | A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp)); /* SP_FS_CONFIG */ OUT_RING(ring, s[FS].instrlen); /* SP_FS_INSTRLEN */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c index f4bad03..171a016 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -448,7 +448,8 @@ fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb, state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->needs_border = needs_border; - fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset); + fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset, + NULL, NULL, NULL); /* NOTE: uses copy of key in state obj, because pointer passed by caller * is probably on the stack -- 2.7.4