radeonsi: use a single descriptor for the GSVS ring
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 29 Nov 2016 16:41:59 +0000 (17:41 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 12 Dec 2016 08:05:05 +0000 (09:05 +0100)
We can hardcode all of the fields for swizzling in the geometry shader.

The advantage is that we use fewer descriptor slots and we no longer have to
update any of the (ring) descriptors when the geometry shader changes.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 31b7985..9f79c2a 100644 (file)
@@ -327,7 +327,6 @@ struct si_context {
        int                     last_vtx_reuse_depth;
        int                     current_rast_prim; /* primitive type after TES, GS */
        bool                    gs_tri_strip_adj_fix;
-       unsigned                last_gsvs_itemsize;
 
        /* Scratch buffer */
        struct r600_resource    *scratch_buffer;
index 48ccd83..9b49592 100644 (file)
@@ -5817,6 +5817,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm =
                ctx->soa.bld_base.base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
 
        LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
                                            SI_PARAM_RW_BUFFERS);
@@ -5836,18 +5837,74 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
        }
 
        if (ctx->shader->is_gs_copy_shader) {
-               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
+               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
 
                ctx->gsvs_ring[0] =
                        build_indexed_load_const(ctx, buf_ptr, offset);
-       }
-       if (ctx->type == PIPE_SHADER_GEOMETRY) {
-               int i;
-               for (i = 0; i < 4; i++) {
-                       LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
+       } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+               struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
+               LLVMValueRef base_ring;
+
+               base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+               /* The conceptual layout of the GSVS ring is
+                *   v0c0 .. vLv0 v0c1 .. vLc1 ..
+                * but the real memory layout is swizzled across
+                * threads:
+                *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+                *   t16v0c0 ..
+                * Override the buffer descriptor accordingly.
+                */
+               LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+               unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size;
+               unsigned num_records;
+
+               num_records = 64;
+               if (ctx->screen->b.chip_class >= VI)
+                       num_records *= max_gsvs_emit_size;
+
+               for (unsigned stream = 0; stream < 4; ++stream) {
+                       LLVMValueRef ring, tmp;
+
+                       if (!ctx->shader->selector->info.num_stream_output_components[stream])
+                               continue;
 
-                       ctx->gsvs_ring[i] =
-                               build_indexed_load_const(ctx, buf_ptr, offset);
+                       /* Limit on the stride field for <= CIK. */
+                       assert(max_gsvs_emit_size < (1 << 14));
+
+                       ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+                       tmp = LLVMBuildAdd(builder, tmp,
+                                          LLVMConstInt(ctx->i64,
+                                                       max_gsvs_emit_size * 64 * stream, 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+                       ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+                       tmp = LLVMBuildOr(builder, tmp,
+                               LLVMConstInt(ctx->i32,
+                                            S_008F04_STRIDE(max_gsvs_emit_size) |
+                                            S_008F04_SWIZZLE_ENABLE(1), 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+                       ring = LLVMBuildInsertElement(builder, ring,
+                                       LLVMConstInt(ctx->i32, num_records, 0),
+                                       LLVMConstInt(ctx->i32, 2, 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring,
+                               LLVMConstInt(ctx->i32,
+                                            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                                            S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                                            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                                            S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                                            S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                            S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                            S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
+                                            S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+                                            S_008F0C_ADD_TID_ENABLE(1),
+                                            0),
+                               LLVMConstInt(ctx->i32, 3, 0), "");
+                       ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
+
+                       ctx->gsvs_ring[stream] = ring;
                }
        }
 }
index d8e6024..a17dbc7 100644 (file)
@@ -167,11 +167,7 @@ enum {
        SI_ES_RING_ESGS,
        SI_GS_RING_ESGS,
 
-       SI_GS_RING_GSVS0,
-       SI_GS_RING_GSVS1,
-       SI_GS_RING_GSVS2,
-       SI_GS_RING_GSVS3,
-       SI_VS_RING_GSVS,
+       SI_RING_GSVS,
 
        SI_VS_STREAMOUT_BUF0,
        SI_VS_STREAMOUT_BUF1,
index ea71569..1e9f5f0 100644 (file)
@@ -2039,47 +2039,14 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
                                   false, false, 0, 0, 0);
        }
        if (sctx->gsvs_ring) {
-               si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
+               si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
                                   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
                                   false, false, 0, 0, 0);
-
-               /* Also update SI_GS_RING_GSVSi descriptors. */
-               sctx->last_gsvs_itemsize = 0;
        }
 
        return true;
 }
 
-static void si_update_gsvs_ring_bindings(struct si_context *sctx)
-{
-       unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
-       uint64_t offset;
-
-       if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
-               return;
-
-       sctx->last_gsvs_itemsize = gsvs_itemsize;
-
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, 0);
-
-       offset = gsvs_itemsize * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-
-       offset = (gsvs_itemsize * 2) * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-
-       offset = (gsvs_itemsize * 3) * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-}
-
 /**
  * @returns 1 if \p sel has been updated to use a new scratch buffer
  *          0 if not
@@ -2469,8 +2436,6 @@ bool si_update_shaders(struct si_context *sctx)
 
                if (!si_update_gs_ring_buffers(sctx))
                        return false;
-
-               si_update_gsvs_ring_bindings(sctx);
        } else {
                si_pm4_bind_state(sctx, gs, NULL);
                si_pm4_bind_state(sctx, es, NULL);