From 286b7723f04626a1337c0d33bdbf8e7cb5c9e8ee Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 30 Aug 2023 18:22:31 -0700 Subject: [PATCH] freedreno/a6xx: ARB_sample_locations Note, gl_SamplePosition (rgetpos), and therefore interpolateAtSample(), doesn't work with sample location_enable=true. For vulkan, "If the current pipeline uses custom sample locations the value of any variable decorated with the SamplePosition built-in decoration is undefined." But ARB_sample_positions doesn't mention this. Possibly the vk text should be backported to the gl extension. (If the app is specifying the sample locations, it shouldn't need gl_SamplePosition.) The upshot of this is 2 out of 3 tests that piglit arb_sample_locations test tests fail, even though sample locations itself is working. Possibly the test should be updated. Or we could use driconf to hide ARB_gpu_shader5 and ARB_sample_shading from this test. Signed-off-by: Rob Clark Part-of: --- docs/features.txt | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_context.cc | 14 +++++++ src/gallium/drivers/freedreno/a6xx/fd6_context.h | 5 ++- src/gallium/drivers/freedreno/a6xx/fd6_emit.cc | 46 +++++++++++++++++++++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 1 + src/gallium/drivers/freedreno/freedreno_context.h | 7 +++- src/gallium/drivers/freedreno/freedreno_screen.c | 13 +++++++ src/gallium/drivers/freedreno/freedreno_state.c | 20 ++++++++++ 8 files changed, 102 insertions(+), 6 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index ada8a9b..31a8913 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -304,7 +304,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_parallel_shader_compile DONE (freedreno, radeonsi, etnaviv, zink, iris, crocus/gen6+) GL_ARB_post_depth_coverage DONE (freedreno/a6xx, nvc0, radeonsi, llvmpipe, zink, iris/gen9+) GL_ARB_robustness_isolation not started - GL_ARB_sample_locations DONE (nvc0, zink) + GL_ARB_sample_locations DONE (freedreno/a6xx, nvc0, zink) GL_ARB_seamless_cubemap_per_texture DONE (etnaviv/SEAMLESS_CUBE_MAP, freedreno, nvc0, r600, radeonsi, softpipe, virgl, zink, asahi, iris, crocus) GL_ARB_shader_ballot DONE (nvc0, radeonsi, zink, iris, crocus/gen8) GL_ARB_shader_clock DONE (nv50, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7+) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index 8931b46..0b217a0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -39,6 +39,7 @@ #include "fd6_emit.h" #include "fd6_gmem.h" #include "fd6_image.h" +#include "fd6_pack.h" #include "fd6_program.h" #include "fd6_query.h" #include "fd6_rasterizer.h" @@ -58,6 +59,9 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt if (fd6_ctx->streamout_disable_stateobj) fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj); + if (fd6_ctx->sample_locations_disable_stateobj) + fd_ringbuffer_del(fd6_ctx->sample_locations_disable_stateobj); + fd_context_destroy(pctx); if (fd6_ctx->vsc_draw_strm) @@ -173,6 +177,7 @@ setup_state_map(struct fd_context *ctx) BIT(FD6_GROUP_PROG_FB_RAST)); fd_context_add_map(ctx, FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK, BIT(FD6_GROUP_BLEND)); + fd_context_add_map(ctx, FD_DIRTY_SAMPLE_LOCATIONS, BIT(FD6_GROUP_SAMPLE_LOCATIONS)); fd_context_add_map(ctx, FD_DIRTY_BLEND_COLOR, BIT(FD6_GROUP_BLEND_COLOR)); fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_CONST, BIT(FD6_GROUP_CONST)); @@ -309,6 +314,15 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd6_blitter_init(pctx); + struct fd_ringbuffer *ring = + fd_ringbuffer_new_object(fd6_ctx->base.pipe, 6 * 4); + + OUT_REG(ring, A6XX_GRAS_SAMPLE_CONFIG()); + OUT_REG(ring, A6XX_RB_SAMPLE_CONFIG()); + OUT_REG(ring, A6XX_SP_TP_SAMPLE_CONFIG()); + + fd6_ctx->sample_locations_disable_stateobj = ring; + return fd_context_init_tc(pctx, flags); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 4139904..c6412c0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -103,9 +103,12 @@ struct fd6_context { struct fd_bo *control_mem; uint32_t seqno; - /* pre-backed stateobj for stream-out disable: */ + /* pre-baked stateobj for stream-out disable: */ struct fd_ringbuffer *streamout_disable_stateobj; + /* pre-baked stateobj for sample-locations disable: */ + struct fd_ringbuffer *sample_locations_disable_stateobj; + /* storage for ctx->last.key: */ struct ir3_shader_key last_key; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 0b3ffd0..159e15c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -352,6 +352,45 @@ build_blend_color(struct fd6_emit *emit) assert_dt return ring; } +static struct fd_ringbuffer * +build_sample_locations(struct fd6_emit *emit) + assert_dt +{ + struct fd_context *ctx = emit->ctx; + + if (!ctx->sample_locations_enabled) { + struct fd6_context *fd6_ctx = fd6_context(ctx); + return fd_ringbuffer_ref(fd6_ctx->sample_locations_disable_stateobj); + } + + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( + ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING); + + uint32_t sample_locations = 0; + for (int i = 0; i < 4; i++) { + float x = (ctx->sample_locations[i] & 0xf) / 16.0f; + float y = (16 - (ctx->sample_locations[i] >> 4)) / 16.0f; + + x = CLAMP(x, 0.0f, 0.9375f); + y = CLAMP(y, 0.0f, 0.9375f); + + sample_locations |= + (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(x) | + A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(y)) << i*8; + } + + OUT_REG(ring, A6XX_GRAS_SAMPLE_CONFIG(.location_enable = true), + A6XX_GRAS_SAMPLE_LOCATION_0(.dword = sample_locations)); + + OUT_REG(ring, A6XX_RB_SAMPLE_CONFIG(.location_enable = true), + A6XX_RB_SAMPLE_LOCATION_0(.dword = sample_locations)); + + OUT_REG(ring, A6XX_SP_TP_SAMPLE_CONFIG(.location_enable = true), + A6XX_SP_TP_SAMPLE_LOCATION_0(.dword = sample_locations)); + + return ring; +} + static void fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt { @@ -603,6 +642,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) state = build_blend_color(emit); fd6_state_take_group(&emit->state, state, FD6_GROUP_BLEND_COLOR); break; + case FD6_GROUP_SAMPLE_LOCATIONS: + state = build_sample_locations(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_SAMPLE_LOCATIONS); + break; case FD6_GROUP_VS_BINDLESS: state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS); @@ -846,14 +889,11 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0); WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0); WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); - WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0); /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_MODE_CNTL * but this seems to kill texture gather offsets. */ WRITE(REG_A6XX_SP_TP_MODE_CNTL, 0xa0 | A6XX_SP_TP_MODE_CNTL_ISAMMODE(ISAMMODE_GL)); - WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0); - WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0); WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0); OUT_REG(ring, HLSQ_CONTROL_5_REG( diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 5fd9e38..c8ca723 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -64,6 +64,7 @@ enum fd6_state_id { FD6_GROUP_BLEND, FD6_GROUP_SCISSOR, FD6_GROUP_BLEND_COLOR, + FD6_GROUP_SAMPLE_LOCATIONS, FD6_GROUP_SO, FD6_GROUP_VS_BINDLESS, FD6_GROUP_HS_BINDLESS, diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 8cff6b8..e753b35 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -166,9 +166,10 @@ enum fd_dirty_3d_state { FD_DIRTY_IMAGE = BIT(18), FD_DIRTY_SSBO = BIT(19), FD_DIRTY_QUERY = BIT(20), + FD_DIRTY_SAMPLE_LOCATIONS = BIT(21), /* only used by a2xx.. possibly can be removed.. */ - FD_DIRTY_TEXSTATE = BIT(21), + FD_DIRTY_TEXSTATE = BIT(22), /* fine grained state changes, for cases where state is not orthogonal * from hw perspective: @@ -487,6 +488,10 @@ struct fd_context { unsigned sample_mask dt; unsigned min_samples dt; + /* 1x1 grid, max 4x MSAA: */ + uint8_t sample_locations[4] dt; + bool sample_locations_enabled dt; + /* local context fb state, for when ctx->batch is null: */ struct pipe_framebuffer_state framebuffer dt; uint32_t all_mrt_channel_mask dt; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index a4d70db..54dfcbf 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -125,6 +125,14 @@ fd_screen_get_device_vendor(struct pipe_screen *pscreen) return "Qualcomm"; } +static void +fd_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count, + unsigned *out_width, unsigned *out_height) +{ + *out_width = 1; + *out_height = 1; +} + static uint64_t fd_screen_get_timestamp(struct pipe_screen *pscreen) { @@ -314,6 +322,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: return is_a6xx(screen); + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: + return is_a6xx(screen) && screen->info->a6xx.has_sample_locations; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); @@ -1250,6 +1261,8 @@ fd_screen_create(int fd, pscreen->get_vendor = fd_screen_get_vendor; pscreen->get_device_vendor = fd_screen_get_device_vendor; + pscreen->get_sample_pixel_grid = fd_get_sample_pixel_grid; + pscreen->get_timestamp = fd_screen_get_timestamp; pscreen->fence_reference = _fd_fence_ref; diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index daa7934..5fea07e 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -99,6 +99,25 @@ fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) in_dt } static void +fd_set_sample_locations(struct pipe_context *pctx, size_t size, + const uint8_t *locations) + in_dt +{ + struct fd_context *ctx = fd_context(pctx); + + if (!locations) { + ctx->sample_locations_enabled = false; + return; + } + + size = MIN2(size, sizeof(ctx->sample_locations)); + memcpy(ctx->sample_locations, locations, size); + ctx->sample_locations_enabled = true; + + fd_context_dirty(ctx, FD_DIRTY_SAMPLE_LOCATIONS); +} + +static void fd_set_min_samples(struct pipe_context *pctx, unsigned min_samples) in_dt { struct fd_context *ctx = fd_context(pctx); @@ -805,6 +824,7 @@ fd_state_init(struct pipe_context *pctx) pctx->set_shader_buffers = fd_set_shader_buffers; pctx->set_shader_images = fd_set_shader_images; pctx->set_framebuffer_state = fd_set_framebuffer_state; + pctx->set_sample_locations = fd_set_sample_locations; pctx->set_polygon_stipple = fd_set_polygon_stipple; pctx->set_scissor_states = fd_set_scissor_states; pctx->set_viewport_states = fd_set_viewport_states; -- 2.7.4