From 6d434252e239bc872549e59c64eb3d0e5dab0655 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 31 Jan 2014 08:06:25 +0000 Subject: [PATCH] r600g: add support for multiple viewports. tested on rv635 and barts. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 41 +++++++++++++++++++--------- src/gallium/drivers/r600/r600_blit.c | 4 +-- src/gallium/drivers/r600/r600_hw_context.c | 9 ++++-- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 8 ++++-- src/gallium/drivers/r600/r600_shader.c | 17 ++++++++++++ src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600_state.c | 37 +++++++++++++++++-------- src/gallium/drivers/r600/r600_state_common.c | 19 ++++++++----- 9 files changed, 98 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c86e812..41607b1 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1320,20 +1320,25 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx, const struct pipe_scissor_state *state) { struct r600_context *rctx = (struct r600_context *)ctx; + int i; - rctx->scissor.scissor = *state; - rctx->scissor.atom.dirty = true; + for (i = start_slot; i < start_slot + num_scissors; i++) { + rctx->scissor[i].scissor = state[i - start_slot]; + rctx->scissor[i].atom.dirty = true; + } } static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - struct pipe_scissor_state *state = &rctx->scissor.scissor; + struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom; + struct pipe_scissor_state *state = &rstate->scissor; + unsigned offset = rstate->idx * 4 * 2; uint32_t tl, br; evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br); - r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); + r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2); radeon_emit(cs, tl); radeon_emit(cs, br); } @@ -2817,6 +2822,7 @@ void cayman_init_common_regs(struct r600_command_buffer *cb, static void cayman_init_atom_start_cs(struct r600_context *rctx) { struct r600_command_buffer *cb = &rctx->start_cs_cmd; + int tmp; r600_init_command_buffer(cb, 256); @@ -2904,9 +2910,11 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); - r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); - r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ - r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2 * 16); + for (tmp = 0; tmp < 16; tmp++) { + r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ + r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + } r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F); @@ -3353,9 +3361,11 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); - r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); - r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ - r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2 * 16); + for (tmp = 0; tmp < 16; tmp++) { + r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ + r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + } r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0); r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x0000043F); @@ -3727,6 +3737,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->clip_dist_write & 0xF0) != 0) | S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) | S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) | + S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport) | S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer); } @@ -4037,7 +4048,7 @@ static boolean evergreen_dma_blit(struct pipe_context *ctx, void evergreen_init_state_functions(struct r600_context *rctx) { unsigned id = 4; - + int i; /* !!! * To avoid GPU lockup registers must be emited in a specific order * (no kidding ...). The order below is important and have been @@ -4087,9 +4098,13 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0); r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6); r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0); - r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 4); + for (i = 0; i < 16; i++) { + r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8); + r600_init_atom(rctx, &rctx->scissor[i].atom, id++, evergreen_emit_scissor_state, 4); + rctx->viewport[i].idx = i; + rctx->scissor[i].idx = i; + } r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); - r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); rctx->atoms[id++] = &rctx->b.streamout.begin_atom; r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 6bb7cfe..770eb38 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -65,8 +65,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); if (op & R600_SAVE_FRAGMENT_STATE) { - util_blitter_save_viewport(rctx->blitter, &rctx->viewport.state); - util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor); + util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state); + util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor); util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index fc81e95..dc3c221 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -276,7 +276,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) void r600_begin_new_cs(struct r600_context *ctx) { unsigned shader; - + int i; ctx->b.flags = 0; ctx->b.gtt = 0; ctx->b.vram = 0; @@ -297,7 +297,10 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->poly_offset_state.atom.dirty = true; ctx->vgt_state.atom.dirty = true; ctx->sample_mask.atom.dirty = true; - ctx->scissor.atom.dirty = true; + for (i = 0; i < 16; i++) { + ctx->scissor[i].atom.dirty = true; + ctx->viewport[i].atom.dirty = true; + } ctx->config_state.atom.dirty = true; ctx->stencil_ref.atom.dirty = true; ctx->vertex_fetch_shader.atom.dirty = true; @@ -308,7 +311,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->gs_rings.atom.dirty = true; } ctx->vertex_shader.atom.dirty = true; - ctx->viewport.atom.dirty = true; + if (ctx->blend_state.cso) ctx->blend_state.atom.dirty = true; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d9b4509..796f0f5 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -434,7 +434,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 8; case PIPE_CAP_MAX_VIEWPORTS: - return 1; + return 16; /* Timer queries, present when the clock frequency is non zero. */ case PIPE_CAP_QUERY_TIME_ELAPSED: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 9f27a17..15052da 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "util/u_double_list.h" #include "util/u_transfer.h" -#define R600_NUM_ATOMS 42 +#define R600_NUM_ATOMS 72 /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 @@ -179,6 +179,7 @@ struct r600_stencil_ref_state { struct r600_viewport_state { struct r600_atom atom; struct pipe_viewport_state state; + int idx; }; struct r600_shader_stages_state { @@ -358,6 +359,7 @@ struct r600_scissor_state struct r600_atom atom; struct pipe_scissor_state scissor; bool enable; /* r6xx only */ + int idx; }; struct r600_fetch_shader { @@ -419,12 +421,12 @@ struct r600_context { struct r600_poly_offset_state poly_offset_state; struct r600_cso_state rasterizer_state; struct r600_sample_mask sample_mask; - struct r600_scissor_state scissor; + struct r600_scissor_state scissor[16]; struct r600_seamless_cube_map seamless_cube_map; struct r600_config_state config_state; struct r600_stencil_ref_state stencil_ref; struct r600_vgt_state vgt_state; - struct r600_viewport_state viewport; + struct r600_viewport_state viewport[16]; /* Shaders and shader resources. */ struct r600_cso_state vertex_fetch_shader; struct r600_shader_state vertex_shader; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 758abd7..baeef57 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -492,6 +492,7 @@ static int r600_spi_sid(struct r600_shader_io * io) if (name == TGSI_SEMANTIC_POSITION || name == TGSI_SEMANTIC_PSIZE || name == TGSI_SEMANTIC_LAYER || + name == TGSI_SEMANTIC_VIEWPORT_INDEX || name == TGSI_SEMANTIC_FACE) index = 0; else { @@ -623,6 +624,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->vs_out_misc_write = 1; ctx->shader->vs_out_point_size = 1; break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + ctx->shader->vs_out_misc_write = 1; + ctx->shader->vs_out_viewport = 1; + break; case TGSI_SEMANTIC_LAYER: ctx->shader->vs_out_misc_write = 1; ctx->shader->vs_out_layer = 1; @@ -1296,6 +1301,18 @@ static int generate_gs_copy_shader(struct r600_context *rctx, ctx.shader->vs_out_misc_write = 1; ctx.shader->vs_out_layer = 1; break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + output.array_base = 61; + if (next_clip_pos == 61) + next_clip_pos = 62; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + ctx.shader->vs_out_misc_write = 1; + ctx.shader->vs_out_viewport = 1; + output.swizzle_x = 7; + output.swizzle_y = 7; + output.swizzle_z = 7; + output.swizzle_w = 0; + break; case TGSI_SEMANTIC_CLIPDIST: /* spi_sid is 0 for clipdistance outputs that were generated * for clipvertex - we don't need to pass them to PS */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 89602c7..519d0aa 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -63,6 +63,7 @@ struct r600_shader { boolean vs_out_misc_write; boolean vs_out_point_size; boolean vs_out_layer; + boolean vs_out_viewport; boolean has_txq_cube_array_z_comp; boolean uses_tex_buffers; boolean gs_prim_id_input; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0dfd426..5538898 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1205,10 +1205,12 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx, static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - struct pipe_scissor_state *state = &rctx->scissor.scissor; + struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom; + struct pipe_scissor_state *state = &rstate->scissor; + unsigned offset = rstate->idx * 4 * 2; - if (rctx->b.chip_class != R600 || rctx->scissor.enable) { - r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); + if (rctx->b.chip_class != R600 || rctx->scissor[0].enable) { + r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2); radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1)); radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy)); @@ -1226,13 +1228,18 @@ static void r600_set_scissor_states(struct pipe_context *ctx, const struct pipe_scissor_state *state) { struct r600_context *rctx = (struct r600_context *)ctx; + int i; - rctx->scissor.scissor = *state; + for (i = start_slot ; i < start_slot + num_scissors; i++) { + rctx->scissor[i].scissor = state[i - start_slot]; + } - if (rctx->b.chip_class == R600 && !rctx->scissor.enable) + if (rctx->b.chip_class == R600 && !rctx->scissor[0].enable) return; - rctx->scissor.atom.dirty = true; + for (i = start_slot ; i < start_slot + num_scissors; i++) { + rctx->scissor[i].atom.dirty = true; + } } static struct r600_resource *r600_buffer_create_helper(struct r600_screen *rscreen, @@ -2775,9 +2782,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, 0x3F800000); /* R_028C14_PA_CL_GB_HORZ_CLIP_ADJ */ r600_store_value(cb, 0x3F800000); /* R_028C18_PA_CL_GB_HORZ_DISC_ADJ */ - r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); - r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ - r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + r600_store_context_reg_seq(cb, R_0282D0_PA_SC_VPORT_ZMIN_0, 2 * 16); + for (tmp = 0; tmp < 16; tmp++) { + r600_store_value(cb, 0); /* R_0282D0_PA_SC_VPORT_ZMIN_0 */ + r600_store_value(cb, 0x3F800000); /* R_0282D4_PA_SC_VPORT_ZMAX_0 */ + } r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, 0x43F); @@ -2996,6 +3005,7 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->clip_dist_write & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->clip_dist_write & 0xF0) != 0) | S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) | + S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport) | S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size); } @@ -3393,6 +3403,7 @@ static boolean r600_dma_blit(struct pipe_context *ctx, void r600_init_state_functions(struct r600_context *rctx) { unsigned id = 4; + int i; /* !!! * To avoid GPU lockup registers must be emited in a specific order @@ -3440,10 +3451,14 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0); r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6); r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0); - r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 4); + for (i = 0;i < 16; i++) { + r600_init_atom(rctx, &rctx->scissor[i].atom, id++, r600_emit_scissor_state, 4); + r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8); + rctx->scissor[i].idx = i; + rctx->viewport[i].idx = i; + } r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3); r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); - r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 8); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); rctx->atoms[id++] = &rctx->b.streamout.begin_atom; r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 22f2b06..da4578a 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -334,9 +334,9 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) /* Workaround for a missing scissor enable on r600. */ if (rctx->b.chip_class == R600 && - rs->scissor_enable != rctx->scissor.enable) { - rctx->scissor.enable = rs->scissor_enable; - rctx->scissor.atom.dirty = true; + rs->scissor_enable != rctx->scissor[0].enable) { + rctx->scissor[0].enable = rs->scissor_enable; + rctx->scissor[0].atom.dirty = true; } /* Re-emit PA_SC_LINE_STIPPLE. */ @@ -657,17 +657,22 @@ static void r600_set_viewport_states(struct pipe_context *ctx, const struct pipe_viewport_state *state) { struct r600_context *rctx = (struct r600_context *)ctx; + int i; - rctx->viewport.state = *state; - rctx->viewport.atom.dirty = true; + for (i = start_slot; i < start_slot + num_viewports; i++) { + rctx->viewport[i].state = state[i - start_slot]; + rctx->viewport[i].atom.dirty = true; + } } void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - struct pipe_viewport_state *state = &rctx->viewport.state; + struct r600_viewport_state *rstate = (struct r600_viewport_state *)atom; + struct pipe_viewport_state *state = &rstate->state; + int offset = rstate->idx * 6 * 4; - r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0, 6); + r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6); radeon_emit(cs, fui(state->scale[0])); /* R_02843C_PA_CL_VPORT_XSCALE_0 */ radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */ radeon_emit(cs, fui(state->scale[1])); /* R_028444_PA_CL_VPORT_YSCALE_0 */ -- 2.7.4