From 6dc8afc19b5286311d66d2523295ace371d561f2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 Mar 2023 13:45:34 -0800 Subject: [PATCH] freedreno/a6xx+: Use template to handle a6xx vs a7xx differences This doesn't enable support for a7xx yet, but uses the new register pack builders for registers that differ between a7xx and a6xx. Signed-off-by: Rob Clark Part-of: --- src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc | 192 +++++++++++++-------- src/gallium/drivers/freedreno/a6xx/fd6_blitter.h | 6 +- src/gallium/drivers/freedreno/a6xx/fd6_compute.cc | 75 +++++--- src/gallium/drivers/freedreno/a6xx/fd6_compute.h | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_context.cc | 15 +- src/gallium/drivers/freedreno/a6xx/fd6_context.h | 5 +- src/gallium/drivers/freedreno/a6xx/fd6_draw.cc | 53 +++--- src/gallium/drivers/freedreno/a6xx/fd6_draw.h | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_emit.cc | 36 ++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 8 +- src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc | 144 +++++++++------- src/gallium/drivers/freedreno/a6xx/fd6_gmem.h | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_image.cc | 12 +- src/gallium/drivers/freedreno/a6xx/fd6_image.h | 5 +- src/gallium/drivers/freedreno/a6xx/fd6_program.cc | 145 ++++++++++------ src/gallium/drivers/freedreno/a6xx/fd6_program.h | 5 +- .../drivers/freedreno/a6xx/fd6_rasterizer.cc | 6 +- .../drivers/freedreno/a6xx/fd6_rasterizer.h | 8 +- src/gallium/drivers/freedreno/a6xx/fd6_screen.cc | 6 +- src/gallium/drivers/freedreno/a6xx/fd6_screen.h | 2 +- 20 files changed, 444 insertions(+), 285 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc index 71ce345..7c1775f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc @@ -262,6 +262,7 @@ emit_setup(struct fd_batch *batch) fd6_emit_ccu_cntl(ring, screen, false); } +template static void emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt, bool scissor_enable, union pipe_color_union *color, @@ -296,14 +297,14 @@ emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt, * controlling the internal/accumulator format or something like * that. It's certainly not tied to only the src format. */ - OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1); - OUT_RING( - ring, - A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) | - COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) | - COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) | - COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) | - A6XX_SP_2D_DST_FORMAT_MASK(0xf)); + OUT_REG(ring, SP_2D_DST_FORMAT( + CHIP, + .sint = util_format_is_pure_sint(pfmt), + .uint = util_format_is_pure_uint(pfmt), + .color_format = fmt, + .srgb = is_srgb, + .mask = 0xf, + )); OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); OUT_RING(ring, unknown_8c01); @@ -330,6 +331,7 @@ emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst, /* buffers need to be handled specially since x/width can exceed the bounds * supported by hw.. if necessary decompose into (potentially) two 2D blits */ +template static void emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_blit_info *info) @@ -379,7 +381,7 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, sshift = sbox->x & 0x3f; dshift = dbox->x & 0x3f; - emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0); + emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0); for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) { unsigned soff, doff, w, p; @@ -397,22 +399,26 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, * Emit source: */ OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO( + SP_PS_2D_SRC_INFO( + CHIP, .color_format = FMT6_8_UNORM, .tile_mode = TILE6_LINEAR, .color_swap = WZYX, .unk20 = true, .unk22 = true, ), - A6XX_SP_PS_2D_SRC_SIZE( + SP_PS_2D_SRC_SIZE( + CHIP, .width = sshift + w, .height = 1, ), - A6XX_SP_PS_2D_SRC( + SP_PS_2D_SRC( + CHIP, .bo = src->bo, .bo_offset = soff, ), - A6XX_SP_PS_2D_SRC_PITCH( + SP_PS_2D_SRC_PITCH( + CHIP, .pitch = p, ), ); @@ -454,19 +460,20 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +template static void fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt { struct fd_ringbuffer *ring = fd_batch_get_prologue(batch); union pipe_color_union color = {}; - emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0); + emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0); OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO(), - A6XX_SP_PS_2D_SRC_SIZE(), - A6XX_SP_PS_2D_SRC(), - A6XX_SP_PS_2D_SRC_PITCH(), + SP_PS_2D_SRC_INFO(CHIP), + SP_PS_2D_SRC_SIZE(CHIP), + SP_PS_2D_SRC(CHIP), + SP_PS_2D_SRC_PITCH(CHIP), ); OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); @@ -583,6 +590,7 @@ emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, } } +template static void emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, unsigned layer, unsigned nr_samples, bool sample_0) @@ -605,7 +613,8 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, sfmt = FMT6_A8_UNORM; OUT_REG(ring, - A6XX_SP_PS_2D_SRC_INFO( + SP_PS_2D_SRC_INFO( + CHIP, .color_format = sfmt, .tile_mode = stile, .color_swap = sswap, @@ -617,28 +626,36 @@ emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info, .unk20 = true, .unk22 = true, ), - A6XX_SP_PS_2D_SRC_SIZE( + SP_PS_2D_SRC_SIZE( + CHIP, .width = width, .height = height, ), - A6XX_SP_PS_2D_SRC( + SP_PS_2D_SRC( + CHIP, .bo = src->bo, .bo_offset = soff, ), - A6XX_SP_PS_2D_SRC_PITCH( + SP_PS_2D_SRC_PITCH( + CHIP, .pitch = pitch, ), ); - if (subwc_enabled) { - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6); - fd6_emit_flag_reference(ring, src, info->src.level, layer); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) { + OUT_REG(ring, + SP_PS_2D_SRC_FLAGS( + CHIP, + .bo = src->bo, + .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer), + ), + SP_PS_2D_SRC_FLAGS_PITCH( + CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)), + ); } } +template static void emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct pipe_blit_info *info, bool sample_0) @@ -699,11 +716,11 @@ emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1)); } - emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate); + emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate); for (unsigned i = 0; i < info->dst.box.depth; i++) { - emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0); + emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0); emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level, dbox->z + i); @@ -809,6 +826,7 @@ convert_color(enum pipe_format format, union pipe_color_union *pcolor) return color; } +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, struct pipe_surface *psurf, const struct pipe_box *box2d, @@ -830,7 +848,7 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, union pipe_color_union clear_color = convert_color(psurf->format, color); emit_clear_color(ring, psurf->format, &clear_color); - emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0); + emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0); for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer; i++) { @@ -856,6 +874,14 @@ fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct pipe_surface *psurf, const struct pipe_box *box2d, + union pipe_color_union *color, uint32_t unknown_8c01); +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct pipe_surface *psurf, const struct pipe_box *box2d, + union pipe_color_union *color, uint32_t unknown_8c01); + +template static void fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned level, const struct pipe_box *box, const void *data) @@ -890,7 +916,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, util_format_unpack_s_8uint(prsc->format, &stencil, data, 1); if (rsc->stencil) - fd6_clear_texture(pctx, &rsc->stencil->b.b, level, box, &stencil); + fd6_clear_texture(pctx, &rsc->stencil->b.b, level, box, &stencil); color.f[0] = depth; color.ui[1] = stencil; @@ -928,7 +954,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, }, }; - fd6_clear_surface(ctx, batch->draw, &surf, box, &color, 0); + fd6_clear_surface(ctx, batch->draw, &surf, box, &color, 0); fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true); fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true); @@ -945,6 +971,7 @@ fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc, fd_context_dirty(ctx, FD_DIRTY_QUERY); } +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) @@ -969,7 +996,7 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, /* Enable scissor bit, which will take into account the window scissor * which is set per-tile */ - emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0); + emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0); /* We shouldn't be using GMEM in the layered rendering case: */ assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); @@ -980,24 +1007,32 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR); enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples); - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10); - OUT_RING(ring, - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) | - A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) | - COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) | - COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) | - A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22); - OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height)); - OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */ - OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */ - OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch)); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); + OUT_REG(ring, + SP_PS_2D_SRC_INFO( + CHIP, + .color_format = sfmt, + .tile_mode = TILE6_2, + .color_swap = WZYX, + .srgb = util_format_is_srgb(psurf->format), + .samples = samples, + .samples_average = samples > MSAA_ONE, + .unk20 = true, + .unk22 = true, + ), + SP_PS_2D_SRC_SIZE( + CHIP, + .width = psurf->width, + .height = psurf->height, + ), + SP_PS_2D_SRC( + CHIP, + .qword = gmem_base, + ), + SP_PS_2D_SRC_PITCH( + CHIP, + .pitch = gmem_pitch, + ), + ); /* sync GMEM writes with CACHE. */ fd6_cache_inv(batch, ring); @@ -1018,6 +1053,12 @@ fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, fd_wfi(batch, ring); } +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01); +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01); + +template static bool handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info, bool sample_0) assert_dt @@ -1065,12 +1106,12 @@ handle_rgba_blit(struct fd_context *ctx, (info->dst.resource->target == PIPE_BUFFER)) { assert(src->layout.tile_mode == TILE6_LINEAR); assert(dst->layout.tile_mode == TILE6_LINEAR); - emit_blit_buffer(ctx, batch->draw, info); + emit_blit_buffer(ctx, batch->draw, info); } else { /* I don't *think* we need to handle blits between buffer <-> !buffer */ assert(info->src.resource->target != PIPE_BUFFER); assert(info->dst.resource->target != PIPE_BUFFER); - emit_blit_texture(ctx, batch->draw, info, sample_0); + emit_blit_texture(ctx, batch->draw, info, sample_0); } trace_end_blit(&batch->trace, batch->draw); @@ -1098,11 +1139,12 @@ handle_rgba_blit(struct fd_context *ctx, * in particular as u_blitter cannot blit stencil. So handle the fallback * ourself and never "fail". */ +template static bool do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info, bool sample_0) assert_dt { - bool success = handle_rgba_blit(ctx, info, sample_0); + bool success = handle_rgba_blit(ctx, info, sample_0); if (!success) { if (sample_0 && !util_format_is_pure_integer(info->src.format)) mesa_logw("sample averaging on fallback blit when we shouldn't."); @@ -1116,6 +1158,7 @@ do_rewritten_blit(struct fd_context *ctx, * Handle depth/stencil blits either via u_blitter and/or re-writing the * blit into an equivilant format that we can handle */ +template static bool handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt @@ -1139,14 +1182,14 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R8_UINT; blit.dst.format = PIPE_FORMAT_R8_UINT; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (info->mask & PIPE_MASK_Z) { blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R32_FLOAT; blit.dst.format = PIPE_FORMAT_R32_FLOAT; - do_rewritten_blit(ctx, &blit, true); + do_rewritten_blit(ctx, &blit, true); } if (info->mask & PIPE_MASK_S) { @@ -1155,7 +1198,7 @@ handle_zs_blit(struct fd_context *ctx, blit.dst.format = PIPE_FORMAT_R8_UINT; blit.src.resource = &src->stencil->b.b; blit.dst.resource = &dst->stencil->b.b; - do_rewritten_blit(ctx, &blit, true); + do_rewritten_blit(ctx, &blit, true); } return true; @@ -1164,7 +1207,7 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R16_UNORM; blit.dst.format = PIPE_FORMAT_R16_UNORM; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z32_FLOAT: @@ -1172,7 +1215,7 @@ handle_zs_blit(struct fd_context *ctx, blit.mask = PIPE_MASK_R; blit.src.format = PIPE_FORMAT_R32_UINT; blit.dst.format = PIPE_FORMAT_R32_UINT; - return do_rewritten_blit(ctx, &blit, true); + return do_rewritten_blit(ctx, &blit, true); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: @@ -1206,6 +1249,7 @@ handle_zs_blit(struct fd_context *ctx, } } +template static bool handle_compressed_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt @@ -1251,7 +1295,7 @@ handle_compressed_blit(struct fd_context *ctx, blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw); blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh); - return do_rewritten_blit(ctx, &blit, false); + return do_rewritten_blit(ctx, &blit, false); } /** @@ -1260,6 +1304,7 @@ handle_compressed_blit(struct fd_context *ctx, * (also -1.0), when we're supposed to be memcpying the bits. See * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion. */ +template static bool handle_snorm_copy_blit(struct fd_context *ctx, const struct pipe_blit_info *info) @@ -1273,41 +1318,48 @@ handle_snorm_copy_blit(struct fd_context *ctx, blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format); - return do_rewritten_blit(ctx, &blit, false); + return do_rewritten_blit(ctx, &blit, false); } +template static bool fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt { if (info->mask & PIPE_MASK_ZS) - return handle_zs_blit(ctx, info); + return handle_zs_blit(ctx, info); if (util_format_is_compressed(info->src.format) || util_format_is_compressed(info->dst.format)) - return handle_compressed_blit(ctx, info); + return handle_compressed_blit(ctx, info); if ((info->src.format == info->dst.format) && util_format_is_snorm(info->src.format)) - return handle_snorm_copy_blit(ctx, info); + return handle_snorm_copy_blit(ctx, info); - return handle_rgba_blit(ctx, info, false); + return handle_rgba_blit(ctx, info, false); } +template void -fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_blitter_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->clear_ubwc = fd6_clear_ubwc; + ctx->clear_ubwc = fd6_clear_ubwc; ctx->validate_format = fd6_validate_format; if (FD_DBG(NOBLIT)) return; - pctx->clear_texture = fd6_clear_texture; - ctx->blit = fd6_blit; + pctx->clear_texture = fd6_clear_texture; + ctx->blit = fd6_blit; } +/* Teach the compiler about needed variants: */ +template void fd6_blitter_init(struct pipe_context *pctx); +template void fd6_blitter_init(struct pipe_context *pctx); + unsigned fd6_tile_mode(const struct pipe_resource *tmpl) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h index 8687f57..d7b8a25 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.h @@ -32,8 +32,8 @@ #include "freedreno_context.h" -BEGINC; +template void fd6_blitter_init(struct pipe_context *pctx); unsigned fd6_tile_mode(const struct pipe_resource *tmpl); @@ -42,12 +42,12 @@ unsigned fd6_tile_mode(const struct pipe_resource *tmpl); * instead of CP_EVENT_WRITE::BLITs */ +template void fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring, struct pipe_surface *psurf, const struct pipe_box *box2d, union pipe_color_union *color, uint32_t unknown_8c01) assert_dt; +template void fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01) assert_dt; -ENDC; - #endif /* FD6_BLIT_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index 0306e70..9802964 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -41,6 +41,7 @@ #include "fd6_pack.h" /* maybe move to fd6_program? */ +template static void cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *v) @@ -49,14 +50,16 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_info *i = &v->info; enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, )); - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) | - A6XX_HLSQ_CS_CNTL_ENABLED); + OUT_REG(ring, HLSQ_CS_CNTL( + CHIP, + .constlen = v->constlen, + .enabled = true, + )); OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 1); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | @@ -103,6 +106,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, fd6_emit_immediates(ctx->screen, v, ring); } +template static void fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt { @@ -119,7 +123,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt return; cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); - cs_program_emit(ctx, cs->stateobj, cs->v); + cs_program_emit(ctx, cs->stateobj, cs->v); cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v); } @@ -156,7 +160,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt } if (ctx->gen_dirty) - fd6_emit_cs_state(ctx, ring, cs); + fd6_emit_cs_state(ctx, ring, cs); if (ctx->gen_dirty & BIT(FD6_GROUP_CONST)) fd6_emit_cs_user_consts(ctx, ring, cs); @@ -201,25 +205,37 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt const unsigned *num_groups = info->grid; /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ const unsigned work_dim = info->work_dim ? info->work_dim : 3; - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7); - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3); - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ + OUT_REG(ring, + HLSQ_CS_NDRANGE_0( + CHIP, + .kerneldim = work_dim, + .localsizex = local_size[0] - 1, + .localsizey = local_size[1] - 1, + .localsizez = local_size[2] - 1, + ), + HLSQ_CS_NDRANGE_1( + CHIP, + .globalsize_x = local_size[0] * num_groups[0], + ), + HLSQ_CS_NDRANGE_2(CHIP, .globaloff_x = 0), + HLSQ_CS_NDRANGE_3( + CHIP, + .globalsize_y = local_size[1] * num_groups[1], + ), + HLSQ_CS_NDRANGE_4(CHIP, .globaloff_y = 0), + HLSQ_CS_NDRANGE_5( + CHIP, + .globalsize_z = local_size[2] * num_groups[2], + ), + HLSQ_CS_NDRANGE_6(CHIP, .globaloff_z = 0), + ); + + OUT_REG(ring, + HLSQ_CS_KERNEL_GROUP_X(CHIP, 1), + HLSQ_CS_KERNEL_GROUP_Y(CHIP, 1), + HLSQ_CS_KERNEL_GROUP_Z(CHIP, 1), + ); if (info->indirect) { struct fd_resource *rsc = fd_resource(info->indirect); @@ -264,11 +280,18 @@ fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso) free(hwcso); } +template void -fd6_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_compute_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->launch_grid = fd6_launch_grid; + + ctx->launch_grid = fd6_launch_grid; pctx->create_compute_state = fd6_compute_state_create; pctx->delete_compute_state = fd6_compute_state_delete; } + +/* Teach the compiler about needed variants: */ +template void fd6_compute_init(struct pipe_context *pctx); +template void fd6_compute_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.h b/src/gallium/drivers/freedreno/a6xx/fd6_compute.h index f832790..3836589b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.h @@ -36,7 +36,7 @@ struct fd6_compute_state { uint32_t user_consts_cmdstream_size; }; -EXTERNC +template void fd6_compute_init(struct pipe_context *pctx); #endif /* FD6_COMPUTE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index 1d4281e..0493b6f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -228,6 +228,7 @@ setup_state_map(struct fd_context *ctx) BIT(FD6_GROUP_NON_GROUP)); } +template struct pipe_context * fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) disable_thread_safety_analysis @@ -253,11 +254,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create; pctx->create_vertex_elements_state = fd6_vertex_state_create; - fd6_draw_init(pctx); - fd6_compute_init(pctx); - fd6_gmem_init(pctx); + fd6_draw_init(pctx); + fd6_compute_init(pctx); + fd6_gmem_init(pctx); fd6_texture_init(pctx); - fd6_prog_init(pctx); + fd6_prog_init(pctx); fd6_query_context_init(pctx); setup_state_map(&fd6_ctx->base); @@ -297,7 +298,11 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd_context_setup_common_vbos(&fd6_ctx->base); - fd6_blitter_init(pctx); + fd6_blitter_init(pctx); return fd_context_init_tc(pctx, flags); } + +/* Teach the compiler about needed variants: */ +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 56a40a2..7025b22 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -38,8 +38,6 @@ #include "a6xx.xml.h" -BEGINC; - struct fd6_lrz_state { union { struct { @@ -154,6 +152,7 @@ fd6_context(struct fd_context *ctx) return (struct fd6_context *)ctx; } +template struct pipe_context *fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); @@ -197,6 +196,4 @@ fd6_vertex_stateobj(void *p) return (struct fd6_vertex_stateobj *)p; } -ENDC; - #endif /* FD6_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc index 1e778ab..425a6f7 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc @@ -202,6 +202,7 @@ flush_streamout(struct fd_context *ctx, struct fd6_emit *emit) } } +template static void fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -344,7 +345,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, } if (emit.dirty_groups) - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); if (ctx->batch->barrier) fd6_barrier_flush(ctx->batch); @@ -398,7 +399,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, if (emit.dirty_groups) { emit.state.num_groups = 0; emit.draw = &draws[i]; - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); } assert(!index_offset); /* handled by util_draw_multi() */ @@ -418,6 +419,7 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, fd_context_all_clean(ctx); } +template static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) assert_dt { @@ -436,7 +438,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a fd6_emit_ccu_cntl(ring, screen, false); OUT_REG(ring, - A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, @@ -451,23 +453,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) a OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); OUT_RING(ring, 0x0); - OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - - OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1); - OUT_RING(ring, 0x0000f410); + OUT_REG(ring, + SP_PS_2D_SRC_INFO(CHIP), + SP_PS_2D_SRC_SIZE(CHIP), + SP_PS_2D_SRC(CHIP), + SP_PS_2D_SRC_PITCH(CHIP), + ); + + OUT_REG(ring, SP_2D_DST_FORMAT( + CHIP, + // TODO probably FMT6_16_UNORM, but this matches what we used to emit: + .color_format = FMT6_32_32_32_32_FLOAT, + .mask = 0xf, + )); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); OUT_RING(ring, @@ -545,6 +543,7 @@ is_z32(enum pipe_format format) } } +template static bool fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, const union pipe_color_union *color, double depth, @@ -566,7 +565,7 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) { zsbuf->lrz_valid = true; zsbuf->lrz_direction = FD_LRZ_UNKNOWN; - fd6_clear_lrz(ctx->batch, zsbuf, depth); + fd6_clear_lrz(ctx->batch, zsbuf, depth); } } @@ -586,10 +585,16 @@ fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers, return true; } +template void -fd6_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_draw_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->draw_vbos = fd6_draw_vbos; - ctx->clear = fd6_clear; + ctx->clear = fd6_clear; + ctx->draw_vbos = fd6_draw_vbos; } + +/* Teach the compiler about needed variants: */ +template void fd6_draw_init(struct pipe_context *pctx); +template void fd6_draw_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.h b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h index fad2501..00e9eed 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h @@ -34,7 +34,7 @@ #include "fd6_context.h" -EXTERNC +template void fd6_draw_init(struct pipe_context *pctx); #endif /* FD6_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index c7d7ab9..a172fc3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -535,6 +535,7 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem) return ring; } +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) { @@ -596,7 +597,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) FD6_GROUP_PROG_INTERP); break; case FD6_GROUP_RASTERIZER: - state = fd6_rasterizer_state(ctx, emit->primitive_restart); + state = fd6_rasterizer_state(ctx, emit->primitive_restart); fd6_state_add_group(&emit->state, state, FD6_GROUP_RASTERIZER); break; case FD6_GROUP_PROG_FB_RAST: @@ -613,23 +614,23 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR); break; case FD6_GROUP_VS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS); break; case FD6_GROUP_HS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_HS_BINDLESS); break; case FD6_GROUP_DS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_DS_BINDLESS); break; case FD6_GROUP_GS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_GS_BINDLESS); break; case FD6_GROUP_FS_BINDLESS: - state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read); + state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read); fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS); break; case FD6_GROUP_CONST: @@ -686,6 +687,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_emit(&emit->state, ring); } +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); + +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs) @@ -722,7 +727,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, case FD6_GROUP_CS_BINDLESS: fd6_state_take_group( &state, - fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false), + fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false), FD6_GROUP_CS_BINDLESS); break; default: @@ -749,9 +754,13 @@ fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gme )); } +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs); +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs); + /* emit setup at begin of new cmdstream buffer (don't rely on previous * state, there could have been a context switch between ioctls): */ +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) { @@ -767,7 +776,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) fd6_cache_inv(batch, ring); OUT_REG(ring, - A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, @@ -820,7 +829,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value); - WRITE(REG_A6XX_PC_RASTER_CNTL, 0); + OUT_REG(ring, PC_RASTER_CNTL(CHIP)); WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0); @@ -844,7 +853,11 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0); - WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); + OUT_REG(ring, HLSQ_CONTROL_5_REG( + CHIP, + .linelengthregid = INVALID_REG, + .foveationqualityregid = INVALID_REG, + )); emit_marker6(ring, 7); @@ -904,6 +917,9 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) } } +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + static void fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, unsigned dst_off, struct pipe_resource *src, unsigned src_off, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 16e6b48c..6757237 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -332,22 +332,22 @@ fd6_gl2spacing(enum gl_tess_spacing spacing) } } -BEGINC; - +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt; struct fd6_compute_state; +template void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd6_compute_state *cs) assert_dt; void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem); + +template void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd6_emit_init_screen(struct pipe_screen *pscreen); -ENDC; - static inline void fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc index 70fee1b..c10f09b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc @@ -73,6 +73,7 @@ fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc, } } +template static void emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, const struct fd_gmem_stateobj *gmem) @@ -127,7 +128,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, OUT_REG( ring, - A6XX_RB_MRT_BUF_INFO(i, .color_format = format, + RB_MRT_BUF_INFO(CHIP, i, .color_format = format, .color_tile_mode = tile_mode, .color_swap = swap), A6XX_RB_MRT_PITCH(i, stride), A6XX_RB_MRT_ARRAY_PITCH(i, array_stride), @@ -155,6 +156,7 @@ emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index)); } +template static void emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, const struct fd_gmem_stateobj *gmem) @@ -169,7 +171,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); OUT_REG( - ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt), + ring, RB_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt), A6XX_RB_DEPTH_BUFFER_PITCH(stride), A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(array_stride), A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset), @@ -209,22 +211,25 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, uint32_t offset = fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); - OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true), + OUT_REG(ring, RB_STENCIL_INFO(CHIP, .separate_stencil = true), A6XX_RB_STENCIL_BUFFER_PITCH(stride), A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(array_stride), A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo, .bo_offset = offset), A6XX_RB_STENCIL_BUFFER_BASE_GMEM(base)); } else { - OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); + OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0)); } } else { - OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); - OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ - OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ + OUT_REG(ring, + RB_DEPTH_BUFFER_INFO( + CHIP, + .depth_format = DEPTH6_NONE, + ), + A6XX_RB_DEPTH_BUFFER_PITCH(), + A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(), + A6XX_RB_DEPTH_BUFFER_BASE(), + A6XX_RB_DEPTH_BUFFER_BASE_GMEM(), + ); OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE)); @@ -236,7 +241,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ - OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); + OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0)); } } @@ -359,13 +364,13 @@ patch_fb_read_sysmem(struct fd_batch *batch) util_dynarray_clear(&batch->fb_read_patches); } +template static void update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, bool binning) { struct fd_ringbuffer *ring = batch->gmem; struct fd_screen *screen = batch->ctx->screen; - uint32_t cntl = 0; bool depth_ubwc_enable = false; uint32_t mrts_ubwc_enable = 0; int i; @@ -387,20 +392,23 @@ update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, mrts_ubwc_enable |= 1 << i; } - cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2); - if (binning) - cntl |= A6XX_RB_RENDER_CNTL_BINNING; + struct fd_reg_pair rb_render_cntl = RB_RENDER_CNTL( + CHIP, + .ccusinglecachelinesize = 2, + .binning = binning, + .flag_depth = depth_ubwc_enable, + .flag_mrts = mrts_ubwc_enable, + ); if (screen->info->a6xx.has_cp_reg_write) { - OUT_PKT7(ring, CP_REG_WRITE, 3); - OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL)); - OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL); + OUT_PKT(ring, CP_REG_WRITE, + CP_REG_WRITE_0(TRACK_RENDER_CNTL), + CP_REG_WRITE_1(rb_render_cntl.reg), + CP_REG_WRITE_2(rb_render_cntl.value), + ); } else { - OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1); + OUT_REG(ring, rb_render_cntl); } - OUT_RING(ring, cntl | - COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) | - A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable)); } /* extra size to store VSC_DRAW_STRM_SIZE: */ @@ -693,12 +701,14 @@ struct bin_size_params { unsigned lrz_feedback_zmode_mask; }; +template static void set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem, struct bin_size_params p) { unsigned w = gmem ? gmem->bin_w : 0; unsigned h = gmem ? gmem->bin_h : 0; + OUT_REG(ring, A6XX_GRAS_BIN_CONTROL( .binw = w, .binh = h, .render_mode = p.render_mode, @@ -706,7 +716,8 @@ set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem, .buffers_location = p.buffers_location, .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, )); - OUT_REG(ring, A6XX_RB_BIN_CONTROL( + OUT_REG(ring, RB_BIN_CONTROL( + CHIP, .binw = w, .binh = h, .render_mode = p.render_mode, .force_lrz_write_dis = p.force_lrz_write_dis, @@ -827,9 +838,11 @@ emit_msaa(struct fd_ringbuffer *ring, unsigned nr) } static void prepare_tile_setup_ib(struct fd_batch *batch); +template static void prepare_tile_fini_ib(struct fd_batch *batch); /* before first tile */ +template static void fd6_emit_tile_init(struct fd_batch *batch) assert_dt { @@ -838,7 +851,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct fd_screen *screen = batch->ctx->screen; - fd6_emit_restore(batch, ring); + fd6_emit_restore(batch, ring); fd6_emit_lrz_flush(ring); @@ -851,7 +864,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt fd6_cache_inv(batch, ring); prepare_tile_setup_ib(batch); - prepare_tile_fini_ib(batch); + prepare_tile_fini_ib(batch); OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); @@ -863,8 +876,8 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt fd_wfi(batch, ring); fd6_emit_ccu_cntl(ring, screen, true); - emit_zs(ring, pfb->zsbuf, batch->gmem_state); - emit_mrt(ring, pfb, batch->gmem_state); + emit_zs(ring, pfb->zsbuf, batch->gmem_state); + emit_mrt(ring, pfb, batch->gmem_state); emit_msaa(ring, pfb->samples); patch_fb_read_gmem(batch); @@ -872,12 +885,12 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt /* enable stream-out during binning pass: */ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = BINNING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, }); - update_render_cntl(batch, pfb, true); + update_render_cntl(batch, pfb, true); emit_binning_pass(batch); /* and disable stream-out for draw pass: */ @@ -890,7 +903,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt */ // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS .. - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .force_lrz_write_dis = true, .buffers_location = BUFFERS_IN_GMEM, @@ -912,18 +925,19 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt /* no binning pass, so enable stream-out for draw pass:: */ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, }); } - update_render_cntl(batch, pfb, false); + update_render_cntl(batch, pfb, false); emit_common_init(batch); } +template static void set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) { @@ -933,8 +947,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1); OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); - OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1); - OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); + OUT_REG(ring, SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1)); OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); OUT_RING(ring, @@ -942,6 +955,7 @@ set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) } /* before mem2gmem */ +template static void fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) { @@ -984,10 +998,10 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x0); - set_window_offset(ring, x1, y1); + set_window_offset(ring, x1, y1); const struct fd_gmem_stateobj *gmem = batch->gmem_state; - set_bin_size(ring, gmem, { + set_bin_size(ring, gmem, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_GMEM, .lrz_feedback_zmode_mask = 0x6, @@ -996,7 +1010,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) OUT_PKT7(ring, CP_SET_MODE, 1); OUT_RING(ring, 0x0); } else { - set_window_offset(ring, x1, y1); + set_window_offset(ring, x1, y1); OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); @@ -1388,6 +1402,7 @@ fd6_unknown_8c01(enum pipe_format format, unsigned buffers) return 0; } +template static void emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, struct pipe_surface *psurf, @@ -1412,7 +1427,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us * just do a restore of the other channel for partial packed z/s writes. */ - fd6_resolve_tile(batch, ring, base, psurf, 0); + fd6_resolve_tile(batch, ring, base, psurf, 0); return; } @@ -1442,6 +1457,7 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, * transfer from gmem to system memory (ie. normal RAM) */ +template static void prepare_tile_fini_ib(struct fd_batch *batch) assert_dt { @@ -1459,12 +1475,12 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) { - emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf, - FD_BUFFER_DEPTH); + emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], + pfb->zsbuf, FD_BUFFER_DEPTH); } if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) { - emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf, - FD_BUFFER_STENCIL); + emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], + pfb->zsbuf, FD_BUFFER_STENCIL); } } @@ -1475,8 +1491,8 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt continue; if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], - FD_BUFFER_COLOR); + emit_resolve_blit(batch, ring, gmem->cbuf_base[i], + pfb->cbufs[i], FD_BUFFER_COLOR); } } } @@ -1546,6 +1562,7 @@ fd6_emit_tile_fini(struct fd_batch *batch) } } +template static void emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt { @@ -1572,7 +1589,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) continue; - fd6_clear_surface(ctx, ring, pfb->cbufs[i], &box2d, &color, 0); + fd6_clear_surface(ctx, ring, pfb->cbufs[i], &box2d, &color, 0); } } if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { @@ -1587,8 +1604,8 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { value.f[0] = batch->clear_depth; value.ui[1] = batch->clear_stencil; - fd6_clear_surface(ctx, ring, pfb->zsbuf, &box2d, - &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers)); + fd6_clear_surface(ctx, ring, pfb->zsbuf, &box2d, + &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers)); } if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { @@ -1598,7 +1615,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt stencil_surf.format = PIPE_FORMAT_S8_UINT; stencil_surf.texture = separate_stencil; - fd6_clear_surface(ctx, ring, &stencil_surf, &box2d, &value, 0); + fd6_clear_surface(ctx, ring, &stencil_surf, &box2d, &value, 0); } } @@ -1608,13 +1625,14 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt trace_end_clear_restore(&batch->trace, ring); } +template static void fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt { struct fd_ringbuffer *ring = batch->gmem; struct fd_screen *screen = batch->ctx->screen; - fd6_emit_restore(batch, ring); + fd6_emit_restore(batch, ring); fd6_emit_lrz_flush(ring); if (batch->prologue) { @@ -1638,14 +1656,14 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt else set_scissor(ring, 0, 0, 0, 0); - set_window_offset(ring, 0, 0); + set_window_offset(ring, 0, 0); - set_bin_size(ring, NULL, { + set_bin_size(ring, NULL, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_SYSMEM, }); - emit_sysmem_clears(batch, ring); + emit_sysmem_clears(batch, ring); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); @@ -1671,12 +1689,12 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); - emit_zs(ring, pfb->zsbuf, NULL); - emit_mrt(ring, pfb, NULL); + emit_zs(ring, pfb->zsbuf, NULL); + emit_mrt(ring, pfb, NULL); emit_msaa(ring, pfb->samples); patch_fb_read_sysmem(batch); - update_render_cntl(batch, pfb, false); + update_render_cntl(batch, pfb, false); emit_common_init(batch); } @@ -1704,18 +1722,24 @@ fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt fd_wfi(batch, ring); } +template void -fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis +fd6_gmem_init(struct pipe_context *pctx) + disable_thread_safety_analysis { struct fd_context *ctx = fd_context(pctx); - ctx->emit_tile_init = fd6_emit_tile_init; - ctx->emit_tile_prep = fd6_emit_tile_prep; + ctx->emit_tile_init = fd6_emit_tile_init; + ctx->emit_tile_prep = fd6_emit_tile_prep; ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem; ctx->emit_tile_renderprep = fd6_emit_tile_renderprep; ctx->emit_tile = fd6_emit_tile; ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem; ctx->emit_tile_fini = fd6_emit_tile_fini; - ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; + ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; ctx->emit_sysmem_fini = fd6_emit_sysmem_fini; } + +/* Teach the compiler about needed variants: */ +template void fd6_gmem_init(struct pipe_context *pctx); +template void fd6_gmem_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h index 74b68bd..bb596f9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h @@ -30,7 +30,7 @@ #include "pipe/p_context.h" -EXTERNC +template void fd6_gmem_init(struct pipe_context *pctx); #endif /* FD6_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc index b66f218..7e1a371 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc @@ -169,6 +169,7 @@ validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *se } /* Build bindless descriptor state, returns ownership of state reference */ +template struct fd_ringbuffer * fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read) @@ -258,8 +259,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, unsigned idx = ir3_shader_descriptor_set(shader); if (shader == PIPE_SHADER_COMPUTE) { - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.cs_bindless = 0x1f)); - OUT_REG(ring, A6XX_SP_CS_BINDLESS_BASE_DESCRIPTOR( + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .cs_bindless = 0x1f)); + OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP, idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, )); OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR( @@ -300,8 +301,8 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, ); } } else { - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.gfx_bindless = 0x1f)); - OUT_REG(ring, A6XX_SP_BINDLESS_BASE_DESCRIPTOR( + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .gfx_bindless = 0x1f)); + OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP, idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, )); OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR( @@ -346,6 +347,9 @@ fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, return ring; } +template struct fd_ringbuffer *fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read); +template struct fd_ringbuffer *fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read); + static void fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h index 11d25e2..a0e03c9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h @@ -30,14 +30,11 @@ #include "freedreno_context.h" -BEGINC; - +template struct fd_ringbuffer * fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read) assert_dt; void fd6_image_init(struct pipe_context *pctx); -ENDC; - #endif /* FD6_IMAGE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index 99c7c94..a85c953 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -305,33 +305,44 @@ sp_xs_config(struct ir3_shader_variant *v) A6XX_SP_VS_CONFIG_NSAMP(v->num_samp); } +template static void setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state) { struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 100 * 4); - OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, + OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true, .ds_state = true, .gs_state = true, .fs_state = true, .cs_state = true, .cs_ibo = true, .gfx_ibo = true, )); assert(state->vs->constlen >= state->bs->constlen); - OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); - OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) | - A6XX_HLSQ_VS_CNTL_ENABLED); - OUT_RING(ring, COND(state->hs, - A6XX_HLSQ_HS_CNTL_ENABLED | - A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen))); - OUT_RING(ring, COND(state->ds, - A6XX_HLSQ_DS_CNTL_ENABLED | - A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen))); - OUT_RING(ring, COND(state->gs, - A6XX_HLSQ_GS_CNTL_ENABLED | - A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen))); - OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) | - A6XX_HLSQ_FS_CNTL_ENABLED); + OUT_REG(ring, HLSQ_VS_CNTL( + CHIP, + .constlen = state->vs->constlen, + .enabled = true, + )); + OUT_REG(ring, HLSQ_HS_CNTL( + CHIP, + .constlen = COND(state->hs, state->hs->constlen), + .enabled = COND(state->hs, true), + )); + OUT_REG(ring, HLSQ_DS_CNTL( + CHIP, + .constlen = COND(state->ds, state->ds->constlen), + .enabled = COND(state->ds, true), + )); + OUT_REG(ring, HLSQ_GS_CNTL( + CHIP, + .constlen = COND(state->gs, state->gs->constlen), + .enabled = COND(state->gs, true), + )); + OUT_REG(ring, HLSQ_FS_CNTL( + CHIP, + .constlen = state->fs->constlen, + .enabled = true, + )); OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); OUT_RING(ring, sp_xs_config(state->vs)); @@ -397,6 +408,7 @@ tex_opc_to_prefetch_cmd(opc_t tex_opc) } } +template static void setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, struct fd6_program_state *state, @@ -549,16 +561,18 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE)); for (int i = 0; i < fs->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; - OUT_RING(ring, - A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) | - A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) | - A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) | - A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) | - A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) | - COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) | - COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) | - A6XX_SP_FS_PREFETCH_CMD_CMD( - tex_opc_to_prefetch_cmd(prefetch->tex_opc))); + OUT_RING(ring, SP_FS_PREFETCH_CMD( + CHIP, i, + .src = prefetch->src, + .samp_id = prefetch->samp_id, + .tex_id = prefetch->tex_id, + .dst = prefetch->dst, + .wrmask = prefetch->wrmask, + .half = prefetch->half_precision, + .bindless = prefetch->bindless, + .cmd = tex_opc_to_prefetch_cmd(prefetch->tex_opc), + ).value + ); } OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1); @@ -864,31 +878,43 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); - OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5); - OUT_RING(ring, 0x7); /* XXX */ - OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) | - A6XX_HLSQ_CONTROL_2_REG_CENTERRHW(ij_regid[IJ_PERSP_CENTER_RHW])); - OUT_RING( - ring, - A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID( - ij_regid[IJ_PERSP_CENTROID]) | - A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID( - ij_regid[IJ_LINEAR_CENTROID])); - OUT_RING( - ring, - A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | - A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | - A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) | - A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE])); - OUT_RING(ring, 0xfcfc); /* line length (?), foveation quality */ - - OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1); - OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(fssz) | - COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS)); + OUT_REG(ring, + HLSQ_CONTROL_1_REG(CHIP, 0x7), /* XXX */ + HLSQ_CONTROL_2_REG( + CHIP, + .faceregid = face_regid, + .sampleid = samp_id_regid, + .samplemask = smask_in_regid, + .centerrhw = ij_regid[IJ_PERSP_CENTER_RHW], + ), + HLSQ_CONTROL_3_REG( + CHIP, + .ij_persp_pixel = ij_regid[IJ_PERSP_PIXEL], + .ij_linear_pixel = ij_regid[IJ_LINEAR_PIXEL], + .ij_persp_centroid = ij_regid[IJ_PERSP_CENTROID], + .ij_linear_centroid = ij_regid[IJ_LINEAR_CENTROID], + ), + HLSQ_CONTROL_4_REG( + CHIP, + .ij_persp_sample = ij_regid[IJ_PERSP_SAMPLE], + .ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE], + .xycoordregid = coord_regid, + .zwcoordregid = zwcoord_regid, + ), + HLSQ_CONTROL_5_REG( + CHIP, + .linelengthregid = INVALID_REG, + .foveationqualityregid = INVALID_REG, + ), + ); + + OUT_REG(ring, + HLSQ_FS_CNTL_0( + CHIP, + .threadsize = fssz, + .varyings = enable_varyings, + ), + ); OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1); OUT_RING( @@ -1287,6 +1313,7 @@ emit_interp_state(struct fd_ringbuffer *ring, const struct fd6_program_state *st OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ } +template static struct ir3_program_state * fd6_program_create(void *data, struct ir3_shader_variant *bs, struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, @@ -1336,9 +1363,9 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, fd_screen_unlock(screen); } - setup_config_stateobj(ctx, state); - setup_stateobj(state->binning_stateobj, ctx, state, key, true); - setup_stateobj(state->stateobj, ctx, state, key, false); + setup_config_stateobj(ctx, state); + setup_stateobj(state->binning_stateobj, ctx, state, key, true); + setup_stateobj(state->stateobj, ctx, state, key, false); state->interp_stateobj = create_interp_stateobj(ctx, state); const struct ir3_stream_output_info *stream_output = @@ -1403,19 +1430,25 @@ fd6_program_destroy(void *data, struct ir3_program_state *state) free(so); } +template static const struct ir3_cache_funcs cache_funcs = { - .create_state = fd6_program_create, + .create_state = fd6_program_create, .destroy_state = fd6_program_destroy, }; +template void fd6_prog_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); - ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); + ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); ir3_prog_init(pctx); fd_prog_init(pctx); } + +/* Teach the compiler about needed variants: */ +template void fd6_prog_init(struct pipe_context *pctx); +template void fd6_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index e10898b..e7ae12f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -99,15 +99,12 @@ fd6_last_shader(const struct fd6_program_state *state) return state->vs; } -BEGINC; - void fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) assert_dt; struct fd_ringbuffer *fd6_program_interp_state(struct fd6_emit *emit) assert_dt; +template void fd6_prog_init(struct pipe_context *pctx); -ENDC; - #endif /* FD6_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc index 466cf8d..d636a9e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc @@ -35,6 +35,7 @@ #include "fd6_pack.h" #include "fd6_rasterizer.h" +template struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, @@ -102,7 +103,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, } OUT_REG(ring, A6XX_VPC_POLYGON_MODE(mode)); - OUT_REG(ring, A6XX_PC_POLYGON_MODE(mode)); + OUT_REG(ring, PC_POLYGON_MODE(CHIP, mode)); if (ctx->screen->info->a6xx.has_shading_rate) { OUT_REG(ring, A6XX_RB_UNKNOWN_8A00()); @@ -114,6 +115,9 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, return ring; } +template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); +template struct fd_ringbuffer *__fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); + void * fd6_rasterizer_state_create(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h index 32db99b..800ee1a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h @@ -33,8 +33,6 @@ #include "freedreno_context.h" -BEGINC; - struct fd6_rasterizer_stateobj { struct pipe_rasterizer_state base; @@ -51,11 +49,13 @@ void *fd6_rasterizer_state_create(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso); void fd6_rasterizer_state_delete(struct pipe_context *, void *hwcso); +template struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, const struct pipe_rasterizer_state *cso, bool primitive_restart); +template static inline struct fd_ringbuffer * fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt { @@ -64,13 +64,11 @@ fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart) assert_dt unsigned variant = primitive_restart; if (unlikely(!rasterizer->stateobjs[variant])) { - rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj( + rasterizer->stateobjs[variant] = __fd6_setup_rasterizer_stateobj( ctx, ctx->rasterizer, primitive_restart); } return rasterizer->stateobjs[variant]; } -ENDC; - #endif /* FD6_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc index 010a151..abb7cab 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc @@ -169,7 +169,11 @@ fd6_screen_init(struct pipe_screen *pscreen) FD_GMEM_DEPTH_ENABLED | FD_GMEM_STENCIL_ENABLED | FD_GMEM_BLEND_ENABLED | FD_GMEM_LOGICOP_ENABLED); - pscreen->context_create = fd6_context_create; + if (screen->gen == 7) { + pscreen->context_create = fd6_context_create; + } else { + pscreen->context_create = fd6_context_create; + } pscreen->is_format_supported = fd6_screen_is_format_supported; screen->tile_mode = fd6_tile_mode; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.h b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h index 5d65683..96d8330 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h @@ -28,7 +28,7 @@ #ifndef FD6_SCREEN_H_ #define FD6_SCREEN_H_ -#include "pipe/p_screen.h" +#include "freedreno_screen.h" EXTERNC void fd6_screen_init(struct pipe_screen *pscreen); -- 2.7.4