From b02f97c87559256086a935836abae3f67ad478c6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Mar 2020 16:20:18 +0100 Subject: [PATCH] panfrost: Prepare shader_meta descriptors at emission time This way we avoid potential state leaks and keep the shader_meta initialization in once place. The time spent preparing the shader descriptors should be negligible compared to the time spent pushing those descriptors to the transient buffer (remember we are writing to non-cacheable memory here). Note that we might get back to some sort of shader_meta descriptor caching at some point if that proves necessary, but now we have those panfrost_frag_meta_xxx_update() helpers now where xxx maps directly to a CSO bind, which should ease desc template updates. Signed-off-by: Boris Brezillon Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 23 +- src/gallium/drivers/panfrost/pan_blend_cso.c | 5 - src/gallium/drivers/panfrost/pan_cmdstream.c | 361 ++++++++++++++++++++++++++- src/gallium/drivers/panfrost/pan_cmdstream.h | 3 + src/gallium/drivers/panfrost/pan_compute.c | 9 +- src/gallium/drivers/panfrost/pan_context.c | 352 +------------------------- src/gallium/drivers/panfrost/pan_context.h | 25 +- 7 files changed, 388 insertions(+), 390 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 2ceb5ce..f8874ce 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -37,14 +37,12 @@ #include "tgsi/tgsi_dump.h" void -panfrost_shader_compile( - struct panfrost_context *ctx, - struct mali_shader_meta *meta, - enum pipe_shader_ir ir_type, - const void *ir, - gl_shader_stage stage, - struct panfrost_shader_state *state, - uint64_t *outputs_written) +panfrost_shader_compile(struct panfrost_context *ctx, + enum pipe_shader_ir ir_type, + const void *ir, + gl_shader_stage stage, + struct panfrost_shader_state *state, + uint64_t *outputs_written) { struct panfrost_screen *screen = pan_screen(ctx->base.screen); uint8_t *dst; @@ -80,11 +78,9 @@ panfrost_shader_compile( if (size) { state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE); memcpy(state->bo->cpu, dst, size); - meta->shader = state->bo->gpu | program.first_tag; state->first_tag = program.first_tag; } else { /* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */ - meta->shader = 0x0 | 1; state->first_tag = 1; } @@ -95,9 +91,6 @@ panfrost_shader_compile( state->sysval_count = program.sysval_count; memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count); - meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff); - meta->midgard1.work_count = program.work_register_count; - bool vertex_id = s->info.system_values_read & (1 << SYSTEM_VALUE_VERTEX_ID); bool instance_id = s->info.system_values_read & (1 << SYSTEM_VALUE_INSTANCE_ID); @@ -145,10 +138,6 @@ panfrost_shader_compile( state->uniform_cutoff = program.uniform_cutoff; state->work_reg_count = program.work_register_count; - meta->attribute_count = state->attribute_count; - meta->varying_count = state->varying_count; - meta->midgard1.flags_hi = 8; /* XXX */ - unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1); unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2); unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4); diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 8b4635c..d0824ed 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -144,17 +144,12 @@ panfrost_bind_blend_state(struct pipe_context *pipe, void *cso) { struct panfrost_context *ctx = pan_context(pipe); - struct panfrost_screen *screen = pan_screen(ctx->base.screen); struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; ctx->blend = pblend; if (!blend) return; - - if (screen->quirks & MIDGARD_SFBD) { - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); - } } static void diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index b6b569c..ddef5c1 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -99,6 +99,333 @@ panfrost_vt_update_occlusion_query(struct panfrost_context *ctx, tp->postfix.occlusion_counter = 0; } +static void +panfrost_shader_meta_init(struct panfrost_context *ctx, + enum pipe_shader_type st, + struct mali_shader_meta *meta) +{ + struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st); + + memset(meta, 0, sizeof(*meta)); + meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag; + meta->midgard1.uniform_count = MIN2(ss->uniform_count, + ss->uniform_cutoff); + meta->midgard1.work_count = ss->work_reg_count; + meta->attribute_count = ss->attribute_count; + meta->varying_count = ss->varying_count; + meta->midgard1.flags_hi = 0x8; /* XXX */ + meta->midgard1.flags_lo = 0x220; + meta->texture_count = ctx->sampler_view_count[st]; + meta->sampler_count = ctx->sampler_count[st]; + meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st); +} + +unsigned +panfrost_translate_compare_func(enum pipe_compare_func in) +{ + switch (in) { + case PIPE_FUNC_NEVER: + return MALI_FUNC_NEVER; + + case PIPE_FUNC_LESS: + return MALI_FUNC_LESS; + + case PIPE_FUNC_EQUAL: + return MALI_FUNC_EQUAL; + + case PIPE_FUNC_LEQUAL: + return MALI_FUNC_LEQUAL; + + case PIPE_FUNC_GREATER: + return MALI_FUNC_GREATER; + + case PIPE_FUNC_NOTEQUAL: + return MALI_FUNC_NOTEQUAL; + + case PIPE_FUNC_GEQUAL: + return MALI_FUNC_GEQUAL; + + case PIPE_FUNC_ALWAYS: + return MALI_FUNC_ALWAYS; + + default: + unreachable("Invalid func"); + } +} + +static unsigned +panfrost_translate_stencil_op(enum pipe_stencil_op in) +{ + switch (in) { + case PIPE_STENCIL_OP_KEEP: + return MALI_STENCIL_KEEP; + + case PIPE_STENCIL_OP_ZERO: + return MALI_STENCIL_ZERO; + + case PIPE_STENCIL_OP_REPLACE: + return MALI_STENCIL_REPLACE; + + case PIPE_STENCIL_OP_INCR: + return MALI_STENCIL_INCR; + + case PIPE_STENCIL_OP_DECR: + return MALI_STENCIL_DECR; + + case PIPE_STENCIL_OP_INCR_WRAP: + return MALI_STENCIL_INCR_WRAP; + + case PIPE_STENCIL_OP_DECR_WRAP: + return MALI_STENCIL_DECR_WRAP; + + case PIPE_STENCIL_OP_INVERT: + return MALI_STENCIL_INVERT; + + default: + unreachable("Invalid stencil op"); + } +} + +static void +panfrost_make_stencil_state(const struct pipe_stencil_state *in, + struct mali_stencil_test *out) +{ + out->ref = 0; /* Gallium gets it from elsewhere */ + + out->mask = in->valuemask; + out->func = panfrost_translate_compare_func(in->func); + out->sfail = panfrost_translate_stencil_op(in->fail_op); + out->dpfail = panfrost_translate_stencil_op(in->zfail_op); + out->dppass = panfrost_translate_stencil_op(in->zpass_op); +} + +static void +panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx, + struct mali_shader_meta *fragmeta) +{ + if (!ctx->rasterizer) { + SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true); + SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false); + fragmeta->depth_units = 0.0f; + fragmeta->depth_factor = 0.0f; + SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false); + SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false); + return; + } + + bool msaa = ctx->rasterizer->base.multisample; + + /* TODO: Sample size */ + SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa); + SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa); + fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f; + fragmeta->depth_factor = ctx->rasterizer->base.offset_scale; + + /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */ + + SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, + ctx->rasterizer->base.offset_tri); + SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, + ctx->rasterizer->base.offset_tri); +} + +static void +panfrost_frag_meta_zsa_update(struct panfrost_context *ctx, + struct mali_shader_meta *fragmeta) +{ + const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil; + int zfunc = PIPE_FUNC_ALWAYS; + + if (!zsa) { + struct pipe_stencil_state default_stencil = { + .enabled = 0, + .func = PIPE_FUNC_ALWAYS, + .fail_op = MALI_STENCIL_KEEP, + .zfail_op = MALI_STENCIL_KEEP, + .zpass_op = MALI_STENCIL_KEEP, + .writemask = 0xFF, + .valuemask = 0xFF + }; + + panfrost_make_stencil_state(&default_stencil, + &fragmeta->stencil_front); + fragmeta->stencil_mask_front = default_stencil.writemask; + fragmeta->stencil_back = fragmeta->stencil_front; + fragmeta->stencil_mask_back = default_stencil.writemask; + SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false); + SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false); + } else { + SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, + zsa->stencil[0].enabled); + panfrost_make_stencil_state(&zsa->stencil[0], + &fragmeta->stencil_front); + fragmeta->stencil_mask_front = zsa->stencil[0].writemask; + fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0]; + + /* If back-stencil is not enabled, use the front values */ + + if (zsa->stencil[1].enabled) { + panfrost_make_stencil_state(&zsa->stencil[1], + &fragmeta->stencil_back); + fragmeta->stencil_mask_back = zsa->stencil[1].writemask; + fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1]; + } else { + fragmeta->stencil_back = fragmeta->stencil_front; + fragmeta->stencil_mask_back = fragmeta->stencil_mask_front; + fragmeta->stencil_back.ref = fragmeta->stencil_front.ref; + } + + if (zsa->depth.enabled) + zfunc = zsa->depth.func; + + /* Depth state (TODO: Refactor) */ + + SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, + zsa->depth.writemask); + } + + fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; + fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc)); +} + +static void +panfrost_frag_meta_blend_update(struct panfrost_context *ctx, + struct mali_shader_meta *fragmeta, + struct midgard_blend_rt *rts) +{ + const struct panfrost_screen *screen = pan_screen(ctx->base.screen); + + SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER, + (screen->quirks & MIDGARD_SFBD) && ctx->blend && + !ctx->blend->base.dither); + + /* Get blending setup */ + unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); + + struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; + unsigned shader_offset = 0; + struct panfrost_bo *shader_bo = NULL; + + for (unsigned c = 0; c < rt_count; ++c) + blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, + &shader_offset); + + /* If there is a blend shader, work registers are shared. XXX: opt */ + + for (unsigned c = 0; c < rt_count; ++c) { + if (blend[c].is_shader) + fragmeta->midgard1.work_count = 16; + } + + /* Even on MFBD, the shader descriptor gets blend shaders. It's *also* + * copied to the blend_meta appended (by convention), but this is the + * field actually read by the hardware. (Or maybe both are read...?). + * Specify the last RTi with a blend shader. */ + + fragmeta->blend.shader = 0; + + for (signed rt = (rt_count - 1); rt >= 0; --rt) { + if (!blend[rt].is_shader) + continue; + + fragmeta->blend.shader = blend[rt].shader.gpu | + blend[rt].shader.first_tag; + break; + } + + if (screen->quirks & MIDGARD_SFBD) { + /* When only a single render target platform is used, the blend + * information is inside the shader meta itself. We additionally + * need to signal CAN_DISCARD for nontrivial blend modes (so + * we're able to read back the destination buffer) */ + + SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER, + blend[0].is_shader); + + if (!blend[0].is_shader) { + fragmeta->blend.equation = *blend[0].equation.equation; + fragmeta->blend.constant = blend[0].equation.constant; + } + + SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, + !blend[0].no_blending); + return; + } + + /* Additional blend descriptor tacked on for jobs using MFBD */ + + for (unsigned i = 0; i < rt_count; ++i) { + rts[i].flags = 0x200; + + bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) && + (ctx->pipe_framebuffer.cbufs[i]) && + util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format); + + SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader); + SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending); + SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); + SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); + + if (blend[i].is_shader) { + rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; + } else { + rts[i].blend.equation = *blend[i].equation.equation; + rts[i].blend.constant = blend[i].equation.constant; + } + } +} + +static void +panfrost_frag_shader_meta_init(struct panfrost_context *ctx, + struct mali_shader_meta *fragmeta, + struct midgard_blend_rt *rts) +{ + const struct panfrost_screen *screen = pan_screen(ctx->base.screen); + struct panfrost_shader_state *fs; + + fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); + + fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000); + fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010; + fragmeta->unknown2_4 = 0x4e0; + + /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this + * is required (independent of 32-bit/64-bit descriptors), or why it's + * not used on later GPU revisions. Otherwise, all shader jobs fault on + * these earlier chips (perhaps this is a chicken bit of some kind). + * More investigation is needed. */ + + SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD); + + /* Depending on whether it's legal to in the given shader, we try to + * enable early-z testing (or forward-pixel kill?) */ + + SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z, + !fs->can_discard && !fs->writes_depth); + + /* Add the writes Z/S flags if needed. */ + SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth); + SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil); + + /* Any time texturing is used, derivatives are implicitly calculated, + * so we need to enable helper invocations */ + + SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS, + fs->helper_invocations); + + /* CAN_DISCARD should be set if the fragment shader possibly contains a + * 'discard' instruction. It is likely this is related to optimizations + * related to forward-pixel kill, as per "Mali Performance 3: Is + * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */ + + SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard); + SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard); + + panfrost_frag_meta_rasterizer_update(ctx, fragmeta); + panfrost_frag_meta_zsa_update(ctx, fragmeta); + panfrost_frag_meta_blend_update(ctx, fragmeta, rts); +} + void panfrost_emit_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type st, @@ -112,14 +439,44 @@ panfrost_emit_shader_meta(struct panfrost_batch *batch, return; } + struct mali_shader_meta meta; + + panfrost_shader_meta_init(ctx, st, &meta); + /* Add the shader BO to the batch. */ panfrost_batch_add_bo(batch, ss->bo, PAN_BO_ACCESS_PRIVATE | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe, - sizeof(*ss->tripipe)); + mali_ptr shader_ptr; + + if (st == PIPE_SHADER_FRAGMENT) { + struct panfrost_screen *screen = pan_screen(ctx->base.screen); + unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); + size_t desc_size = sizeof(meta); + struct midgard_blend_rt rts[4]; + struct panfrost_transfer xfer; + + assert(rt_count <= ARRAY_SIZE(rts)); + + panfrost_frag_shader_meta_init(ctx, &meta, rts); + + if (!(screen->quirks & MIDGARD_SFBD)) + desc_size += sizeof(*rts) * rt_count; + + xfer = panfrost_allocate_transient(batch, desc_size); + + memcpy(xfer.cpu, &meta, sizeof(meta)); + memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count); + + shader_ptr = xfer.gpu; + } else { + shader_ptr = panfrost_upload_transient(batch, &meta, + sizeof(meta)); + } + + vtp->postfix.shader = shader_ptr; } static void diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index 2fa088b..f16e1bb 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -32,6 +32,9 @@ #include "pan_job.h" +unsigned +panfrost_translate_compare_func(enum pipe_compare_func in); + void panfrost_vt_attach_framebuffer(struct panfrost_context *ctx, struct midgard_payload_vertex_tiler *vt); diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index d05016e..d67e87f 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -54,9 +54,6 @@ panfrost_create_compute_state( so->variant_count = 1; so->active_variant = 0; - /* calloc, instead of malloc - to zero unused fields */ - v->tripipe = CALLOC_STRUCT(mali_shader_meta); - if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) { struct blob_reader reader; const struct pipe_binary_program_header *hdr = cso->prog; @@ -66,9 +63,8 @@ panfrost_create_compute_state( so->cbase.ir_type = PIPE_SHADER_IR_NIR; } - panfrost_shader_compile(ctx, v->tripipe, - so->cbase.ir_type, so->cbase.prog, - MESA_SHADER_COMPUTE, v, NULL); + panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog, + MESA_SHADER_COMPUTE, v, NULL); return so; } @@ -121,7 +117,6 @@ panfrost_launch_grid(struct pipe_context *pipe, if (info->input) pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf); - panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE); panfrost_emit_shader_meta(batch, PIPE_SHADER_COMPUTE, payload); panfrost_emit_const_buf(batch, PIPE_SHADER_COMPUTE, payload); panfrost_emit_shared_memory(batch, info, payload); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 5904645..0f4bca7 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -207,126 +207,6 @@ translate_tex_wrap(enum pipe_tex_wrap w) } } -static unsigned -panfrost_translate_compare_func(enum pipe_compare_func in) -{ - switch (in) { - case PIPE_FUNC_NEVER: - return MALI_FUNC_NEVER; - - case PIPE_FUNC_LESS: - return MALI_FUNC_LESS; - - case PIPE_FUNC_EQUAL: - return MALI_FUNC_EQUAL; - - case PIPE_FUNC_LEQUAL: - return MALI_FUNC_LEQUAL; - - case PIPE_FUNC_GREATER: - return MALI_FUNC_GREATER; - - case PIPE_FUNC_NOTEQUAL: - return MALI_FUNC_NOTEQUAL; - - case PIPE_FUNC_GEQUAL: - return MALI_FUNC_GEQUAL; - - case PIPE_FUNC_ALWAYS: - return MALI_FUNC_ALWAYS; - - default: - unreachable("Invalid func"); - } -} - -static unsigned -panfrost_translate_stencil_op(enum pipe_stencil_op in) -{ - switch (in) { - case PIPE_STENCIL_OP_KEEP: - return MALI_STENCIL_KEEP; - - case PIPE_STENCIL_OP_ZERO: - return MALI_STENCIL_ZERO; - - case PIPE_STENCIL_OP_REPLACE: - return MALI_STENCIL_REPLACE; - - case PIPE_STENCIL_OP_INCR: - return MALI_STENCIL_INCR; - - case PIPE_STENCIL_OP_DECR: - return MALI_STENCIL_DECR; - - case PIPE_STENCIL_OP_INCR_WRAP: - return MALI_STENCIL_INCR_WRAP; - - case PIPE_STENCIL_OP_DECR_WRAP: - return MALI_STENCIL_DECR_WRAP; - - case PIPE_STENCIL_OP_INVERT: - return MALI_STENCIL_INVERT; - - default: - unreachable("Invalid stencil op"); - } -} - -static void -panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) -{ - out->ref = 0; /* Gallium gets it from elsewhere */ - - out->mask = in->valuemask; - out->func = panfrost_translate_compare_func(in->func); - out->sfail = panfrost_translate_stencil_op(in->fail_op); - out->dpfail = panfrost_translate_stencil_op(in->zfail_op); - out->dppass = panfrost_translate_stencil_op(in->zpass_op); -} - -static void -panfrost_default_shader_backend(struct panfrost_context *ctx) -{ - struct panfrost_screen *screen = pan_screen(ctx->base.screen); - struct mali_shader_meta shader = { - .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), - - .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, - .unknown2_4 = MALI_NO_MSAA | 0x4e0, - }; - - /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this is - * required (independent of 32-bit/64-bit descriptors), or why it's not - * used on later GPU revisions. Otherwise, all shader jobs fault on - * these earlier chips (perhaps this is a chicken bit of some kind). - * More investigation is needed. */ - - if (screen->quirks & MIDGARD_SFBD) - shader.unknown2_4 |= 0x10; - - struct pipe_stencil_state default_stencil = { - .enabled = 0, - .func = PIPE_FUNC_ALWAYS, - .fail_op = MALI_STENCIL_KEEP, - .zfail_op = MALI_STENCIL_KEEP, - .zpass_op = MALI_STENCIL_KEEP, - .writemask = 0xFF, - .valuemask = 0xFF - }; - - panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); - shader.stencil_mask_front = default_stencil.writemask; - - panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); - shader.stencil_mask_back = default_stencil.writemask; - - if (default_stencil.enabled) - shader.unknown2_4 |= MALI_STENCIL_TEST; - - memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); -} - bool panfrost_writes_point_size(struct panfrost_context *ctx) { @@ -503,33 +383,12 @@ panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage) return 32 - __builtin_clz(mask); } -/* Fixes up a shader state with current state */ - -void -panfrost_patch_shader_state(struct panfrost_context *ctx, - enum pipe_shader_type stage) -{ - struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, stage); - - if (!ss) - return; - - ss->tripipe->texture_count = ctx->sampler_view_count[stage]; - ss->tripipe->sampler_count = ctx->sampler_count[stage]; - - ss->tripipe->midgard1.flags_lo = 0x220; - - unsigned ubo_count = panfrost_ubo_count(ctx, stage); - ss->tripipe->midgard1.uniform_buffer_count = ubo_count; -} - /* Go through dirty flags and actualise them in the cmdstream. */ static void panfrost_emit_for_draw(struct panfrost_context *ctx) { struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct panfrost_screen *screen = pan_screen(ctx->base.screen); panfrost_batch_add_fbo_bos(batch); @@ -542,166 +401,15 @@ panfrost_emit_for_draw(struct panfrost_context *ctx) unsigned total_count = ctx->padded_count * ctx->instance_count; panfrost_emit_varying_descriptor(ctx, total_count); - if (ctx->rasterizer) { - bool msaa = ctx->rasterizer->base.multisample; - - /* TODO: Sample size */ - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa); - } - panfrost_batch_set_requirements(batch); panfrost_vt_update_rasterizer(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]); panfrost_vt_update_occlusion_query(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]); - panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX); panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX, &ctx->payloads[PIPE_SHADER_VERTEX]); - - if (ctx->shader[PIPE_SHADER_FRAGMENT]) { - struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); - - panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT); - -#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name - - COPY(shader); - COPY(attribute_count); - COPY(varying_count); - COPY(texture_count); - COPY(sampler_count); - COPY(midgard1.uniform_count); - COPY(midgard1.uniform_buffer_count); - COPY(midgard1.work_count); - COPY(midgard1.flags_lo); - COPY(midgard1.flags_hi); - -#undef COPY - - /* Get blending setup */ - unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1); - - struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS]; - unsigned shader_offset = 0; - struct panfrost_bo *shader_bo = NULL; - - for (unsigned c = 0; c < rt_count; ++c) { - blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset); - } - - /* If there is a blend shader, work registers are shared. XXX: opt */ - - for (unsigned c = 0; c < rt_count; ++c) { - if (blend[c].is_shader) - ctx->fragment_shader_core.midgard1.work_count = 16; - } - - /* Depending on whether it's legal to in the given shader, we - * try to enable early-z testing (or forward-pixel kill?) */ - - SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_EARLY_Z, - !variant->can_discard && !variant->writes_depth); - - /* Add the writes Z/S flags if needed. */ - SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, - MALI_WRITES_Z, variant->writes_depth); - SET_BIT(ctx->fragment_shader_core.midgard1.flags_hi, - MALI_WRITES_S, variant->writes_stencil); - - /* Any time texturing is used, derivatives are implicitly - * calculated, so we need to enable helper invocations */ - - SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, MALI_HELPER_INVOCATIONS, variant->helper_invocations); - - /* Assign the stencil refs late */ - - unsigned front_ref = ctx->stencil_ref.ref_value[0]; - unsigned back_ref = ctx->stencil_ref.ref_value[1]; - bool back_enab = ctx->depth_stencil->stencil[1].enabled; - - ctx->fragment_shader_core.stencil_front.ref = front_ref; - ctx->fragment_shader_core.stencil_back.ref = back_enab ? back_ref : front_ref; - - /* CAN_DISCARD should be set if the fragment shader possibly - * contains a 'discard' instruction. It is likely this is - * related to optimizations related to forward-pixel kill, as - * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good - * thing?" by Peter Harris - */ - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, variant->can_discard); - SET_BIT(ctx->fragment_shader_core.midgard1.flags_lo, 0x400, variant->can_discard); - - /* Even on MFBD, the shader descriptor gets blend shaders. It's - * *also* copied to the blend_meta appended (by convention), - * but this is the field actually read by the hardware. (Or - * maybe both are read...?). Specify the last RTi with a blend - * shader. */ - - ctx->fragment_shader_core.blend.shader = 0; - - for (signed rt = (rt_count - 1); rt >= 0; --rt) { - if (blend[rt].is_shader) { - ctx->fragment_shader_core.blend.shader = - blend[rt].shader.gpu | blend[rt].shader.first_tag; - break; - } - } - - if (screen->quirks & MIDGARD_SFBD) { - /* When only a single render target platform is used, the blend - * information is inside the shader meta itself. We - * additionally need to signal CAN_DISCARD for nontrivial blend - * modes (so we're able to read back the destination buffer) */ - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_BLEND_SHADER, blend[0].is_shader); - - if (!blend[0].is_shader) { - ctx->fragment_shader_core.blend.equation = - *blend[0].equation.equation; - ctx->fragment_shader_core.blend.constant = - blend[0].equation.constant; - } - - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_CAN_DISCARD, !blend[0].no_blending); - } - - size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count); - struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size); - memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); - - ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.shader = transfer.gpu; - - if (!(screen->quirks & MIDGARD_SFBD)) { - /* Additional blend descriptor tacked on for jobs using MFBD */ - - struct midgard_blend_rt rts[4]; - - for (unsigned i = 0; i < rt_count; ++i) { - rts[i].flags = 0x200; - - bool is_srgb = - (ctx->pipe_framebuffer.nr_cbufs > i) && - (ctx->pipe_framebuffer.cbufs[i]) && - util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format); - - SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader); - SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending); - SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb); - SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither); - - if (blend[i].is_shader) { - rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag; - } else { - rts[i].blend.equation = *blend[i].equation.equation; - rts[i].blend.constant = blend[i].equation.constant; - } - } - - memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count); - } - } + panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT, + &ctx->payloads[PIPE_SHADER_FRAGMENT]); /* We stage to transient, so always dirty.. */ if (ctx->vertex) @@ -1110,17 +818,9 @@ panfrost_bind_rasterizer_state( if (!hwcso) return; - ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f; - ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale; - /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */ assert(ctx->rasterizer->base.offset_clamp == 0.0); - /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */ - - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_A, ctx->rasterizer->base.offset_tri); - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_B, ctx->rasterizer->base.offset_tri); - /* Point sprites are emulated */ struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); @@ -1184,15 +884,13 @@ panfrost_create_shader_state( if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) { struct panfrost_context *ctx = pan_context(pctx); - struct mali_shader_meta meta; struct panfrost_shader_state state; uint64_t outputs_written; - panfrost_shader_compile(ctx, &meta, - PIPE_SHADER_IR_NIR, - so->base.ir.nir, - tgsi_processor_to_shader_stage(stage), &state, - &outputs_written); + panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR, + so->base.ir.nir, + tgsi_processor_to_shader_stage(stage), + &state, &outputs_written); } return so; @@ -1440,9 +1138,6 @@ panfrost_bind_shader_state( PIPE_SPRITE_COORD_UPPER_LEFT; } } - - variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta)); - } /* Select this variant */ @@ -1456,12 +1151,12 @@ panfrost_bind_shader_state( if (!shader_state->compiled) { uint64_t outputs_written = 0; - panfrost_shader_compile(ctx, shader_state->tripipe, - variants->base.type, - variants->base.type == PIPE_SHADER_IR_NIR ? - variants->base.ir.nir : - variants->base.tokens, - tgsi_processor_to_shader_stage(type), shader_state, + panfrost_shader_compile(ctx, variants->base.type, + variants->base.type == PIPE_SHADER_IR_NIR ? + variants->base.ir.nir : + variants->base.tokens, + tgsi_processor_to_shader_stage(type), + shader_state, &outputs_written); shader_state->compiled = true; @@ -1753,28 +1448,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe, ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]); } - /* Stencil state */ - SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); - - panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); - ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; - - /* If back-stencil is not enabled, use the front values */ - bool back_enab = ctx->depth_stencil->stencil[1].enabled; - unsigned back_index = back_enab ? 1 : 0; - - panfrost_make_stencil_state(&depth_stencil->stencil[back_index], &ctx->fragment_shader_core.stencil_back); - ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask; - - /* Depth state (TODO: Refactor) */ - SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_WRITEMASK, - depth_stencil->depth.writemask); - - int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; - - ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; - ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); - /* Bounds test not implemented */ assert(!depth_stencil->depth.bounds_test); } @@ -2138,7 +1811,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_batch_init(ctx); panfrost_emit_vertex_payload(ctx); panfrost_invalidate_frame(ctx); - panfrost_default_shader_backend(ctx); return gallium; } diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 2e0c445..b73883d 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -119,12 +119,6 @@ struct panfrost_context { /* Each draw has corresponding vertex and tiler payloads */ struct midgard_payload_vertex_tiler payloads[PIPE_SHADER_TYPES]; - /* The fragment shader binary itself is pointed here (for the tripipe) but - * also everything else in the shader core, including blending, the - * stencil/depth tests, etc. Refer to the presentations. */ - - struct mali_shader_meta fragment_shader_core; - unsigned vertex_count; unsigned instance_count; enum pipe_prim_type active_prim; @@ -188,7 +182,6 @@ struct panfrost_rasterizer { struct panfrost_shader_state { /* Compiled, mapped descriptor, ready for the hardware */ bool compiled; - struct mali_shader_meta *tripipe; /* Non-descript information */ int uniform_count; @@ -296,10 +289,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx); bool panfrost_writes_point_size(struct panfrost_context *ctx); -void -panfrost_patch_shader_state(struct panfrost_context *ctx, - enum pipe_shader_type stage); - struct panfrost_transfer panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler); @@ -325,14 +314,12 @@ mali_ptr panfrost_fragment_job(struct panfrost_batch *batch, bool has_draws); void -panfrost_shader_compile( - struct panfrost_context *ctx, - struct mali_shader_meta *meta, - enum pipe_shader_ir ir_type, - const void *ir, - gl_shader_stage stage, - struct panfrost_shader_state *state, - uint64_t *outputs_written); +panfrost_shader_compile(struct panfrost_context *ctx, + enum pipe_shader_ir ir_type, + const void *ir, + gl_shader_stage stage, + struct panfrost_shader_state *state, + uint64_t *outputs_written); unsigned panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage); -- 2.7.4