r600g: add multiple stream support for geom shaders
authorGlenn Kennard <glenn.kennard@gmail.com>
Thu, 9 Jul 2015 06:37:28 +0000 (16:37 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 2 Sep 2015 05:55:47 +0000 (15:55 +1000)
This patch is taken from work by Glenn and myself,
and I've spent some time making it all work here.

This adds support for the multiple streams part of
ARB_gpu_shader5 to r600g.

It doesn't enable ARB_gpu_shader5 yet.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_state.c

index 9ef9274..0d4b598 100644 (file)
@@ -3012,8 +3012,12 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
        struct r600_command_buffer *cb = &shader->command_buffer;
        struct r600_shader *rshader = &shader->shader;
        struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
-       unsigned gsvs_itemsize =
-                       (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+       unsigned gsvs_itemsizes[4] = {
+                       (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2
+       };
 
        r600_init_command_buffer(cb, 64);
 
@@ -3032,21 +3036,24 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
                                S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
        }
        r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
-       r600_store_value(cb, cp_shader->ring_item_size >> 2);
-       r600_store_value(cb, 0);
-       r600_store_value(cb, 0);
-       r600_store_value(cb, 0);
+       r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
 
        r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
-                              (rshader->ring_item_size) >> 2);
+                              (rshader->ring_item_sizes[0]) >> 2);
 
        r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
-                              gsvs_itemsize);
+                              gsvs_itemsizes[0] +
+                              gsvs_itemsizes[1] +
+                              gsvs_itemsizes[2] +
+                              gsvs_itemsizes[3]);
 
        r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
-       r600_store_value(cb, gsvs_itemsize);
-       r600_store_value(cb, gsvs_itemsize);
-       r600_store_value(cb, gsvs_itemsize);
+       r600_store_value(cb, gsvs_itemsizes[0]);
+       r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
+       r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]);
 
        /* FIXME calculate these values somehow ??? */
        r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
index f6efaa3..fd9c16c 100644 (file)
@@ -358,7 +358,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
                return 16384;
        case PIPE_CAP_MAX_VERTEX_STREAMS:
-               return 1;
+               return family >= CHIP_CEDAR ? 4 : 1;
 
        case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
                return 2047;
index 153f9a2..25a251e 100644 (file)
@@ -311,7 +311,8 @@ struct r600_shader_ctx {
        int                                     gs_out_ring_offset;
        int                                     gs_next_vertex;
        struct r600_shader      *gs_for_vs;
-       int                                     gs_export_gpr_treg;
+       int                                     gs_export_gpr_tregs[4];
+       const struct pipe_stream_output_info    *gs_stream_output_info;
        unsigned                                enabled_stream_buffers_mask;
 };
 
@@ -320,7 +321,7 @@ struct r600_shader_tgsi_instruction {
        int (*process)(struct r600_shader_ctx *ctx);
 };
 
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -1337,9 +1338,11 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
        return 0;
 }
 
-static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
+                                                 int stream, unsigned *stream_item_size)
 {
        unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+       unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
        int i, j, r;
 
        /* Sanity checking. */
@@ -1359,8 +1362,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 
        /* Initialize locations where the outputs are stored. */
        for (i = 0; i < so->num_outputs; i++) {
-               so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
 
+               so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+               start_comp[i] = so->output[i].start_component;
                /* Lower outputs with dst_offset < start_component.
                 *
                 * We can only output 4D vectors with a write mask, e.g. we can
@@ -1386,7 +1390,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                if (r)
                                        return r;
                        }
-                       so->output[i].start_component = 0;
+                       start_comp[i] = 0;
                        so_gpr[i] = tmp;
                }
        }
@@ -1395,18 +1399,21 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
        for (i = 0; i < so->num_outputs; i++) {
                struct r600_bytecode_output output;
 
+               if (stream != -1 && stream != so->output[i].output_buffer)
+                       continue;
+
                memset(&output, 0, sizeof(struct r600_bytecode_output));
                output.gpr = so_gpr[i];
-               output.elem_size = so->output[i].num_components;
-               output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+               output.elem_size = so->output[i].num_components - 1;
+               if (output.elem_size == 2)
+                       output.elem_size = 3; // 3 not supported, write 4 with junk at end
+               output.array_base = so->output[i].dst_offset - start_comp[i];
                output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
                output.burst_count = 1;
                /* array_size is an upper limit for the burst_count
                 * with MEM_STREAM instructions */
                output.array_size = 0xFFF;
-               output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
-
-               ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+               output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
 
                if (ctx->bc->chip_class >= EVERGREEN) {
                        switch (so->output[i].output_buffer) {
@@ -1423,6 +1430,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                output.op = CF_OP_MEM_STREAM0_BUF3;
                                break;
                        }
+                       output.op += so->output[i].stream * 4;
+                       assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+                       ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
                } else {
                        switch (so->output[i].output_buffer) {
                        case 0:
@@ -1438,6 +1448,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                output.op = CF_OP_MEM_STREAM3;
                                        break;
                        }
+                       ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
                }
                r = r600_bytecode_add_output(ctx->bc, &output);
                if (r)
@@ -1490,7 +1501,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
        struct r600_bytecode_output output;
        struct r600_bytecode_cf *cf_jump, *cf_pop,
                *last_exp_pos = NULL, *last_exp_param = NULL;
-       int i, next_clip_pos = 61, next_param = 0;
+       int i, j, next_clip_pos = 61, next_param = 0;
+       int ring;
 
        cshader = calloc(1, sizeof(struct r600_pipe_shader));
        if (!cshader)
@@ -1510,6 +1522,9 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 
        ctx.bc->isa = rctx->isa;
 
+       cf_jump = NULL;
+       memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
+
        /* R0.x = R0.x & 0x3fffffff */
        memset(&alu, 0, sizeof(alu));
        alu.op = ALU_OP2_AND_INT;
@@ -1528,22 +1543,10 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
        alu.last = 1;
        r600_bytecode_add_alu(ctx.bc, &alu);
 
-       /* PRED_SETE_INT __, R0.y, 0 */
-       memset(&alu, 0, sizeof(alu));
-       alu.op = ALU_OP2_PRED_SETE_INT;
-       alu.src[0].chan = 1;
-       alu.src[1].sel = V_SQ_ALU_SRC_0;
-       alu.execute_mask = 1;
-       alu.update_pred = 1;
-       alu.last = 1;
-       r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
-
-       r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
-       cf_jump = ctx.bc->cf_last;
-
        /* fetch vertex data from GSVS ring */
        for (i = 0; i < ocnt; ++i) {
                struct r600_shader_io *out = &ctx.shader->output[i];
+
                out->gpr = i + 1;
                out->ring_offset = i * 16;
 
@@ -1553,6 +1556,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
                vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
                vtx.offset = out->ring_offset;
                vtx.dst_gpr = out->gpr;
+               vtx.src_gpr = 0;
                vtx.dst_sel_x = 0;
                vtx.dst_sel_y = 1;
                vtx.dst_sel_z = 2;
@@ -1565,18 +1569,68 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 
                r600_bytecode_add_vtx(ctx.bc, &vtx);
        }
+       ctx.temp_reg = i + 1;
+       for (ring = 3; ring >= 0; --ring) {
+               bool enabled = false;
+               for (i = 0; i < so->num_outputs; i++) {
+                       if (so->output[i].stream == ring) {
+                               enabled = true;
+                               break;
+                       }
+               }
+               if (ring != 0 && !enabled) {
+                       cshader->shader.ring_item_sizes[ring] = 0;
+                       continue;
+               }
+
+               if (cf_jump) {
+                       // Patch up jump label
+                       r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+                       cf_pop = ctx.bc->cf_last;
+
+                       cf_jump->cf_addr = cf_pop->id + 2;
+                       cf_jump->pop_count = 1;
+                       cf_pop->cf_addr = cf_pop->id + 2;
+                       cf_pop->pop_count = 1;
+               }
+
+               /* PRED_SETE_INT __, R0.y, ring */
+               memset(&alu, 0, sizeof(alu));
+               alu.op = ALU_OP2_PRED_SETE_INT;
+               alu.src[0].chan = 1;
+               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[1].value = ring;
+               alu.execute_mask = 1;
+               alu.update_pred = 1;
+               alu.last = 1;
+               r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
 
-       /* XXX handle clipvertex, streamout? */
-       emit_streamout(&ctx, so);
+               r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+               cf_jump = ctx.bc->cf_last;
+
+               if (enabled)
+                       emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
+               cshader->shader.ring_item_sizes[ring] = ocnt * 16;
+       }
 
        /* export vertex data */
        /* XXX factor out common code with r600_shader_from_tgsi ? */
        for (i = 0; i < ocnt; ++i) {
                struct r600_shader_io *out = &ctx.shader->output[i];
-
+               bool instream0 = true;
                if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
                        continue;
 
+               for (j = 0; j < so->num_outputs; j++) {
+                       if (so->output[j].register_index == i) {
+                               if (so->output[j].stream == 0)
+                                       break;
+                               if (so->output[j].stream > 0)
+                                       instream0 = false;
+                       }
+               }
+               if (!instream0)
+                       continue;
                memset(&output, 0, sizeof(output));
                output.gpr = out->gpr;
                output.elem_size = 3;
@@ -1722,19 +1776,19 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
        }
 
        gs->gs_copy_shader = cshader;
+       cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 
        ctx.bc->nstack = 1;
 
-       cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
-       cshader->shader.ring_item_size = ocnt * 16;
-
        return r600_bytecode_build(ctx.bc);
 }
 
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind)
 {
        struct r600_bytecode_output output;
        int i, k, ring_offset;
+       int effective_stream = stream == -1 ? 0 : stream;
+       int idx = 0;
 
        for (i = 0; i < ctx->shader->noutput; i++) {
                if (ctx->gs_for_vs) {
@@ -1751,15 +1805,18 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
 
                        if (ring_offset == -1)
                                continue;
-               } else
-                       ring_offset = i * 16;
+               } else {
+                       ring_offset = idx * 16;
+                       idx++;
+               }
 
+               if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
+                       continue;
                /* next_ring_offset after parsing input decls contains total size of
                 * single vertex data, gs_next_vertex - current vertex index */
                if (!ind)
                        ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
 
-               /* get a temp and add the ring offset to the next vertex base in the shader */
                memset(&output, 0, sizeof(struct r600_bytecode_output));
                output.gpr = ctx->shader->output[i].gpr;
                output.elem_size = 3;
@@ -1770,28 +1827,39 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
                        output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
                else
                        output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-               output.op = CF_OP_MEM_RING;
 
+               switch (stream) {
+               default:
+               case 0:
+                       output.op = CF_OP_MEM_RING; break;
+               case 1:
+                       output.op = CF_OP_MEM_RING1; break;
+               case 2:
+                       output.op = CF_OP_MEM_RING2; break;
+               case 3:
+                       output.op = CF_OP_MEM_RING3; break;
+               }
 
                if (ind) {
                        output.array_base = ring_offset >> 2; /* in dwords */
                        output.array_size = 0xfff;
-                       output.index_gpr = ctx->gs_export_gpr_treg;
+                       output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
                } else
                        output.array_base = ring_offset >> 2; /* in dwords */
                r600_bytecode_add_output(ctx->bc, &output);
        }
 
        if (ind) {
+               /* get a temp and add the ring offset to the next vertex base in the shader */
                struct r600_bytecode_alu alu;
                int r;
 
                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                alu.op = ALU_OP2_ADD_INT;
-               alu.src[0].sel = ctx->gs_export_gpr_treg;
+               alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream];
                alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
                alu.src[1].value = ctx->gs_out_ring_offset >> 4;
-               alu.dst.sel = ctx->gs_export_gpr_treg;
+               alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream];
                alu.dst.write = 1;
                alu.last = 1;
                r = r600_bytecode_add_alu(ctx->bc, &alu);
@@ -1856,6 +1924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        ctx.next_ring_offset = 0;
        ctx.gs_out_ring_offset = 0;
        ctx.gs_next_vertex = 0;
+       ctx.gs_stream_output_info = &so;
 
        shader->uses_index_registers = false;
        ctx.face_gpr = -1;
@@ -1942,8 +2011,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
 
        if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-               ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
-               ctx.temp_reg = ctx.bc->ar_reg + 4;
+               ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
+               ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
+               ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
+               ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
+               ctx.temp_reg = ctx.bc->ar_reg + 7;
        } else {
                ctx.temp_reg = ctx.bc->ar_reg + 3;
        }
@@ -2006,7 +2078,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                }
        }
        
-       shader->ring_item_size = ctx.next_ring_offset;
+       shader->ring_item_sizes[0] = ctx.next_ring_offset;
+       shader->ring_item_sizes[1] = 0;
+       shader->ring_item_sizes[2] = 0;
+       shader->ring_item_sizes[3] = 0;
 
        /* Process two side if needed */
        if (shader->two_side && ctx.colors_used) {
@@ -2129,17 +2204,18 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
                        struct r600_bytecode_alu alu;
                        int r;
-
-                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-                       alu.op = ALU_OP1_MOV;
-                       alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
-                       alu.src[0].value = 0;
-                       alu.dst.sel = ctx.gs_export_gpr_treg;
-                       alu.dst.write = 1;
-                       alu.last = 1;
-                       r = r600_bytecode_add_alu(ctx.bc, &alu);
-                       if (r)
-                               return r;
+                       for (j = 0; j < 4; j++) {
+                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                               alu.op = ALU_OP1_MOV;
+                               alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+                               alu.src[0].value = 0;
+                               alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+                               alu.dst.write = 1;
+                               alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx.bc, &alu);
+                               if (r)
+                                       return r;
+                       }
                }
                if (shader->two_side && ctx.colors_used) {
                        if ((r = process_twoside_color_inputs(&ctx)))
@@ -2240,14 +2316,20 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        /* Add stream outputs. */
        if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
            so.num_outputs && !use_llvm)
-               emit_streamout(&ctx, &so);
+               emit_streamout(&ctx, &so, -1, NULL);
 
        pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
        convert_edgeflag_to_int(&ctx);
 
        if (ring_outputs) {
-               if (key.vs.as_es)
-                       emit_gs_ring_writes(&ctx, FALSE);
+               if (key.vs.as_es) {
+                       ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
+                       ctx.gs_export_gpr_tregs[1] = -1;
+                       ctx.gs_export_gpr_tregs[2] = -1;
+                       ctx.gs_export_gpr_tregs[3] = -1;
+
+                       emit_gs_ring_writes(&ctx, &so, -1, FALSE);
+               }
        } else {
                /* Export output */
                next_clip_base = shader->vs_out_misc_write ? 62 : 61;
@@ -7198,10 +7280,17 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 
 static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
 {
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
+       int r;
+
        if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
-               emit_gs_ring_writes(ctx, TRUE);
+               emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
 
-       return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+       r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+       if (!r)
+               ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
+       return r;
 }
 
 static int tgsi_umad(struct r600_shader_ctx *ctx)
index f5ca9d6..f49e1c1 100644 (file)
@@ -78,8 +78,9 @@ struct r600_shader {
        /* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
        boolean                 uses_index_registers;
 
-       /* size in bytes of a data item in the ring (single vertex data) */
-       unsigned                ring_item_size;
+       /* Size in bytes of a data item in the ring(s) (single vertex data).
+          Stages with only one ring items 123 will be set to 0. */
+       unsigned                ring_item_sizes[4];
 
        unsigned                indirect_files;
        unsigned                max_arrays;
index aff8f03..f2d24a3 100644 (file)
@@ -2650,7 +2650,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
        struct r600_shader *rshader = &shader->shader;
        struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
        unsigned gsvs_itemsize =
-                       (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+                       (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2;
 
        r600_init_command_buffer(cb, 64);
 
@@ -2665,10 +2665,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                               r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
 
        r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
-                              cp_shader->ring_item_size >> 2);
+                              cp_shader->ring_item_sizes[0] >> 2);
 
        r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
-                              (rshader->ring_item_size) >> 2);
+                              (rshader->ring_item_sizes[0]) >> 2);
 
        r600_store_context_reg(cb, R_0288AC_SQ_GSVS_RING_ITEMSIZE,
                               gsvs_itemsize);