From 7a57cfbed695c48915fdb3d7bec37505d3f18e81 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 25 Apr 2019 12:28:35 -0700 Subject: [PATCH] freedreno/a6xx: sample-shading support Enables: OES_sample_shading OES_sample_variables OES_shader_multisample_interpolation Signed-off-by: Rob Clark --- docs/features.txt | 6 +- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 4 +- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 4 +- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 76 ++++++++++++++++++------ src/gallium/drivers/freedreno/freedreno_screen.c | 4 ++ 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index f9aa5f0..c63afea 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -282,11 +282,11 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+, radeonsi, virgl GL_OES_geometry_shader DONE (i965/hsw+, nvc0) GL_OES_gpu_shader5 DONE (freedreno/a6xx, all drivers that support GL_ARB_gpu_shader5) GL_OES_primitive_bounding_box DONE (freedreno/a5xx+, i965/gen7+, nvc0) - GL_OES_sample_shading DONE (i965, nvc0, r600) - GL_OES_sample_variables DONE (i965, nvc0, r600) + GL_OES_sample_shading DONE (freedreno/a6xx, i965, nvc0, r600) + GL_OES_sample_variables DONE (freedreno/a6xx, i965, nvc0, r600) GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store) GL_OES_shader_io_blocks DONE (All drivers that support GLES 3.1) - GL_OES_shader_multisample_interpolation DONE (i965, nvc0, r600) + GL_OES_shader_multisample_interpolation DONE (freedreno/a6xx, i965, nvc0, r600) GL_OES_tessellation_shader DONE (all drivers that support GL_ARB_tessellation_shader) GL_OES_texture_border_clamp DONE (all drivers) GL_OES_texture_buffer DONE (freedreno, i965, nvc0, softpipe) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 5cd619a..767312c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -167,7 +167,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, .fsaturate_r = fd6_ctx->fsaturate_r, .vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples, .fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples, - } + .sample_shading = (ctx->min_samples > 1), + .msaa = (ctx->framebuffer.samples > 1), + }, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index c9cf267..7b8184d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -868,7 +868,9 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) nr = 0; OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); - OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z)); + OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) | + COND(fp->writes_smask && pfb->samples > 1, + A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK)); OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr)); OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 94c725f..3f8cdb3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -300,19 +300,23 @@ next_regid(uint32_t reg, uint32_t increment) #define CONDREG(r, val) COND(VALIDREG(r), (val)) static void -setup_stateobj(struct fd_ringbuffer *ring, - struct fd6_program_state *state, bool binning_pass) +setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, + const struct ir3_shader_key *key, bool binning_pass) { struct stage s[MAX_STAGES]; uint32_t pos_regid, psize_regid, color_regid[8], posz_regid; - uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, samp_mask_regid; - uint32_t vcoord_regid, vertex_regid, instance_regid; + uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; + uint32_t smask_in_regid, smask_regid; + uint32_t vertex_regid, instance_regid; + uint32_t ij_pix_regid, ij_samp_regid, ij_cent_regid, ij_size_regid; enum a3xx_threadsize fssz; uint8_t psize_loc = ~0; int i, j; setup_stages(state, s, binning_pass); + bool sample_shading = s[FS].v->per_samp | key->sample_shading; + fssz = FOUR_QUADS; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); @@ -336,12 +340,22 @@ setup_stateobj(struct fd_ringbuffer *ring, } samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); - samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); + smask_in_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); zwcoord_regid = next_regid(coord_regid, 2); - vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PIXEL); + ij_pix_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PIXEL); + ij_samp_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_SAMPLE); + ij_cent_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_CENTROID); + ij_size_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_SIZE); posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); + smask_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_SAMPLE_MASK); + + /* we can't write gl_SampleMask for !msaa.. if b0 is zero then we + * end up masking the single sample!! + */ + if (!key->msaa) + smask_regid = regid(63, 0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -390,7 +404,8 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1); OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) | - 0xfcfc0000); + A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) | + 0xfc000000); OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(s[VS].constlen) | @@ -510,13 +525,15 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_RING(ring, 0x7); /* XXX */ OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samp_mask_regid) | - 0xfc000000); /* XXX */ - OUT_RING(ring, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(vcoord_regid) | - 0xfcfcfc00); /* XXX */ + A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) | + A6XX_HLSQ_CONTROL_2_REG_SIZE(ij_size_regid)); + OUT_RING(ring, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(ij_pix_regid) | + A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(ij_cent_regid) | + 0xfc00fc00); /* XXX */ OUT_RING(ring, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | - A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | - 0x0000fcfc); /* XXX */ + A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | + A6XX_HLSQ_CONTROL_4_REG_BARY_IJ_PIXEL_PERSAMP(ij_samp_regid) | + 0x0000fc00); /* XXX */ OUT_RING(ring, 0xfc); /* XXX */ OUT_PKT4(ring, REG_A6XX_HLSQ_UNKNOWN_B980, 1); @@ -547,7 +564,12 @@ setup_stateobj(struct fd_ringbuffer *ring, #endif OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1); - OUT_RING(ring, COND(enable_varyings, A6XX_GRAS_CNTL_VARYING) | + OUT_RING(ring, + CONDREG(ij_pix_regid, A6XX_GRAS_CNTL_VARYING) | + CONDREG(ij_cent_regid, A6XX_GRAS_CNTL_CENTROID) | + CONDREG(ij_samp_regid, A6XX_GRAS_CNTL_PERSAMP_VARYING) | + COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_GRAS_CNTL_SIZE) | + COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_GRAS_CNTL_SIZE_PERSAMP) | COND(s[FS].v->frag_coord, A6XX_GRAS_CNTL_SIZE | A6XX_GRAS_CNTL_XCOORD | @@ -557,8 +579,13 @@ setup_stateobj(struct fd_ringbuffer *ring, COND(s[FS].v->frag_face, A6XX_GRAS_CNTL_SIZE)); OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2); - OUT_RING(ring, COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_VARYING | - A6XX_RB_RENDER_CONTROL0_UNK10) | + OUT_RING(ring, + CONDREG(ij_pix_regid, A6XX_RB_RENDER_CONTROL0_VARYING) | + CONDREG(ij_cent_regid, A6XX_RB_RENDER_CONTROL0_CENTROID) | + CONDREG(ij_samp_regid, A6XX_RB_RENDER_CONTROL0_PERSAMP_VARYING) | + COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) | + COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE) | + COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) | COND(s[FS].v->frag_coord, A6XX_RB_RENDER_CONTROL0_SIZE | A6XX_RB_RENDER_CONTROL0_XCOORD | @@ -568,10 +595,21 @@ setup_stateobj(struct fd_ringbuffer *ring, COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL0_SIZE)); OUT_RING(ring, - CONDREG(samp_mask_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) | + CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) | + COND(sample_shading, A6XX_RB_RENDER_CONTROL1_UNK4 | A6XX_RB_RENDER_CONTROL1_UNK5) | CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) | + CONDREG(ij_size_regid, A6XX_RB_RENDER_CONTROL1_SIZE) | COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS)); + OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_CNTL, 1); + OUT_RING(ring, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE)); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8101, 1); + OUT_RING(ring, COND(sample_shading, 0x6)); // XXX + + OUT_PKT4(ring, REG_A6XX_GRAS_SAMPLE_CNTL, 1); + OUT_RING(ring, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE)); + OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); for (i = 0; i < 8; i++) { // TODO we could have a mix of half and full precision outputs, @@ -743,8 +781,8 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); - setup_stateobj(state->binning_stateobj, state, true); - setup_stateobj(state->stateobj, state, false); + setup_stateobj(state->binning_stateobj, state, key, true); + setup_stateobj(state->stateobj, state, key, false); return &state->base; } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 58640a8..d1edf82 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -307,6 +307,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; + case PIPE_CAP_SAMPLE_SHADING: + if (is_a6xx(screen)) return 1; + return 0; + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: return 0; -- 2.7.4