From de0b76cab22caa9fc7260f80acb8f151ccced6c5 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Sun, 29 Aug 2010 21:01:51 -0400 Subject: [PATCH] r600g: precompute some of the hw state Idea is to build hw state at pipe state creation and reuse them while keeping a non PM4 packet interface btw winsys & pipe driver. This commit also force rebuild of pm4 packet on each call to radeon_state_pm4 which in turn slow down everythings, this will be addressed. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_blit.c | 4 +- src/gallium/drivers/r600/r600_context.h | 2 +- src/gallium/drivers/r600/r600_resource.h | 2 +- src/gallium/drivers/r600/r600_screen.h | 2 +- src/gallium/drivers/r600/r600_state.c | 226 ++++++++++------------------- src/gallium/drivers/r600/r600_texture.c | 43 ++++-- src/gallium/drivers/r600/radeon.h | 47 ++++-- src/gallium/winsys/r600/drm/r600_state.c | 9 +- src/gallium/winsys/r600/drm/r600_states.h | 28 +++- src/gallium/winsys/r600/drm/radeon_priv.h | 20 +-- src/gallium/winsys/r600/drm/radeon_state.c | 62 ++++++-- 11 files changed, 229 insertions(+), 216 deletions(-) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index be1fcf9..dbcd6cd 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -670,7 +670,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te if (r) { return r; } - r = r600_texture_cb0(ctx, rtexture, level); + r = r600_texture_cb(ctx, rtexture, 0, level); if (r) { return r; } @@ -772,7 +772,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te if (r) { goto out; } - r = radeon_draw_set(draw, rtexture->cb0[level]); + r = radeon_draw_set(draw, rtexture->cb[0][level]); if (r) { goto out; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index d96d5b5..e9495f0 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -121,7 +121,7 @@ struct r600_context_hw_states { struct radeon_state *config; struct radeon_state *cb_cntl; struct radeon_state *db; - struct radeon_state *ucp[6]; + struct radeon_state *ucp; unsigned ps_nresource; unsigned ps_nsampler; struct radeon_state *ps_resource[160]; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index b880f93..8078a83 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -57,7 +57,7 @@ struct r600_resource_texture { unsigned dirty; struct radeon_bo *uncompressed; struct radeon_state *scissor[PIPE_MAX_TEXTURE_LEVELS]; - struct radeon_state *cb0[PIPE_MAX_TEXTURE_LEVELS]; + struct radeon_state *cb[8][PIPE_MAX_TEXTURE_LEVELS]; struct radeon_state *db[PIPE_MAX_TEXTURE_LEVELS]; struct radeon_state *viewport[PIPE_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 438976f..b9938f1 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -84,7 +84,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); -int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); +int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level); int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b5db848..6049e13 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -34,6 +34,16 @@ #include "r600d.h" #include "r600_state_inlines.h" +static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state); +static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state); +static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state); +static struct radeon_state *r600_sampler(struct r600_context *rctx, + const struct pipe_sampler_state *state, + unsigned id); +static struct radeon_state *r600_resource(struct pipe_context *ctx, + const struct pipe_sampler_view *view, + unsigned id); + static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { @@ -86,6 +96,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rstate->state.sampler_view.texture = texture; rstate->state.sampler_view.reference.count = 1; rstate->state.sampler_view.context = ctx; + rstate->rstate = r600_resource(ctx, &rstate->state.sampler_view, 0); return &rstate->state.sampler_view; } @@ -229,6 +240,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)states[i]; rctx->ps_sampler[i] = r600_context_state_incref(rstate); + if (rstate) { + radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_PS); + } } rctx->ps_nsampler = count; } @@ -246,6 +260,9 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)states[i]; rctx->vs_sampler[i] = r600_context_state_incref(rstate); + if (rstate) { + radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_VS); + } } rctx->vs_nsampler = count; } @@ -337,6 +354,9 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)views[i]; rctx->ps_sampler_view[i] = r600_context_state_incref(rstate); + if (rstate) { + radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_PS); + } } rctx->ps_nsampler_view = count; } @@ -355,6 +375,9 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)views[i]; rctx->vs_sampler_view[i] = r600_context_state_incref(rstate); + if (rstate) { + radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_VS); + } } rctx->vs_nsampler_view = count; } @@ -363,10 +386,19 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { struct r600_context *rctx = r600_context(ctx); + struct r600_resource_texture *rtexture; struct r600_context_state *rstate; rstate = r600_context_state(rctx, pipe_framebuffer_type, state); r600_bind_state(ctx, rstate); + for (int i = 0; i < state->nr_cbufs; i++) { + rtexture = (struct r600_resource_texture*)state->cbufs[i]->texture; + r600_texture_cb(ctx, rtexture, i, state->cbufs[i]->level); + } + if (state->zsbuf) { + rtexture = (struct r600_resource_texture*)state->zsbuf->texture; + r600_texture_db(ctx, rtexture, state->zsbuf->level); + } } static void r600_set_polygon_stipple(struct pipe_context *ctx, @@ -565,6 +597,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_viewport_type: rstate->state.viewport = (*states).viewport; + rstate->rstate = r600_viewport(rctx, &rstate->state.viewport); break; case pipe_depth_type: rstate->state.depth = (*states).depth; @@ -580,6 +613,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_clip_type: rstate->state.clip = (*states).clip; + rstate->rstate = r600_ucp(rctx, &rstate->state.clip); break; case pipe_stencil_type: rstate->state.stencil = (*states).stencil; @@ -592,6 +626,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_blend_type: rstate->state.blend = (*states).blend; + rstate->rstate = r600_blend(rctx, &rstate->state.blend); break; case pipe_stencil_ref_type: rstate->state.stencil_ref = (*states).stencil_ref; @@ -606,6 +641,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_sampler_type: rstate->state.sampler = (*states).sampler; + rstate->rstate = r600_sampler(rctx, &rstate->state.sampler, 0); break; default: R600_ERR("invalid type %d\n", rstate->type); @@ -615,11 +651,10 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static struct radeon_state *r600_blend(struct r600_context *rctx) +static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state) { struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; rstate = radeon_state(rscreen->rw, R600_STATE_BLEND, 0); @@ -675,129 +710,28 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) return rstate; } -static struct radeon_state *r600_ucp(struct r600_context *rctx, int clip) +static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state) { struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; - const struct pipe_clip_state *state = &rctx->clip->state.clip; - rstate = radeon_state(rscreen->rw, R600_STATE_CLIP, clip); + rstate = radeon_state(rscreen->rw, R600_STATE_UCP, 0); if (rstate == NULL) return NULL; - rstate->states[R600_CLIP__PA_CL_UCP_X_0] = fui(state->ucp[clip][0]); - rstate->states[R600_CLIP__PA_CL_UCP_Y_0] = fui(state->ucp[clip][1]); - rstate->states[R600_CLIP__PA_CL_UCP_Z_0] = fui(state->ucp[clip][2]); - rstate->states[R600_CLIP__PA_CL_UCP_W_0] = fui(state->ucp[clip][3]); - - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; + for (int i = 0; i < state->nr; i++) { + rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); + rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); + rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); + rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); } - return rstate; -} - -static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct radeon_state *rstate; - const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level = state->cbufs[cb]->level; - unsigned pitch, slice; - unsigned color_info; - unsigned format, swap, ntype; - const struct util_format_description *desc; - - rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0); - if (rstate == NULL) - return NULL; - rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; - rbuffer = &rtex->resource; - rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; - rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; - rstate->nbo = 3; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; - - ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); - - color_info = S_0280A0_FORMAT(format) | - S_0280A0_COMP_SWAP(swap) | - S_0280A0_BLEND_CLAMP(1) | - S_0280A0_SOURCE_FORMAT(1) | - S_0280A0_NUMBER_TYPE(ntype); - - rstate->states[R600_CB0__CB_COLOR0_BASE] = rtex->offset[level] >> 8; - rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; - rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | - S_028060_SLICE_TILE_MAX(slice); - rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; - rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; if (radeon_state_pm4(rstate)) { radeon_state_decref(rstate); return NULL; } return rstate; -} -static struct radeon_state *r600_db(struct r600_context *rctx) -{ - struct r600_screen *rscreen = rctx->screen; - struct r600_resource_texture *rtex; - struct r600_resource *rbuffer; - struct radeon_state *rstate; - const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; - unsigned level; - unsigned pitch, slice, format; - - if (state->zsbuf == NULL) - return NULL; - - rstate = radeon_state(rscreen->rw, R600_STATE_DB, 0); - if (rstate == NULL) - return NULL; - - rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tilled = 1; - rtex->array_mode = 2; - rtex->tile_type = 1; - rtex->depth = 1; - rbuffer = &rtex->resource; - - rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rstate->nbo = 1; - rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; - level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; - format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[R600_DB__DB_DEPTH_BASE] = rtex->offset[level] >> 8; - rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) | - S_028010_FORMAT(format); - rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; - rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; - rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | - S_028000_SLICE_TILE_MAX(slice); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; } static struct radeon_state *r600_rasterizer(struct r600_context *rctx) @@ -954,9 +888,8 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx) return rstate; } -static struct radeon_state *r600_viewport(struct r600_context *rctx) +static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state) { - const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; struct radeon_state *rstate; @@ -1366,6 +1299,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) int r600_context_hw_states(struct pipe_context *ctx) { struct r600_context *rctx = r600_context(ctx); + struct r600_resource_texture *rtexture; unsigned i; int r; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; @@ -1377,69 +1311,59 @@ int r600_context_hw_states(struct pipe_context *ctx) /* free previous TODO determine what need to be updated, what * doesn't */ - //radeon_state_decref(rctx->hw_states.config); rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl); - rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db); rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer); rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor); rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa); - rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend); - rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport); - for (i = 0; i < 8; i++) { - rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]); - } - for (i = 0; i < 6; i++) { - rctx->hw_states.ucp[i] = radeon_state_decref(rctx->hw_states.ucp[i]); - } - for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - radeon_state_decref(rctx->hw_states.ps_resource[i]); - rctx->hw_states.ps_resource[i] = NULL; - } - rctx->hw_states.ps_nresource = 0; - for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - radeon_state_decref(rctx->hw_states.ps_sampler[i]); - rctx->hw_states.ps_sampler[i] = NULL; - } - rctx->hw_states.ps_nsampler = 0; /* build new states */ + rctx->hw_states.blend = NULL; + rctx->hw_states.viewport = NULL; + rctx->hw_states.ucp = NULL; rctx->hw_states.rasterizer = r600_rasterizer(rctx); rctx->hw_states.scissor = r600_scissor(rctx); rctx->hw_states.dsa = r600_dsa(rctx); - rctx->hw_states.blend = r600_blend(rctx); - rctx->hw_states.viewport = r600_viewport(rctx); - for (i = 0; i < nr_cbufs; i++) { - rctx->hw_states.cb[i] = r600_cb(rctx, i); + rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + if (rctx->viewport) { + rctx->hw_states.viewport = rctx->viewport->rstate; } - for (i = 0; i < ucp_nclip; i++) { - rctx->hw_states.ucp[i] = r600_ucp(rctx, i); + if (rctx->blend) { + rctx->hw_states.blend = rctx->blend->rstate; } - rctx->hw_states.db = r600_db(rctx); - rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); + if (rctx->clip) { + rctx->hw_states.ucp = rctx->clip->rstate; + } + for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { + rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.cbufs[i]->texture; + rctx->hw_states.cb[i] = rtexture->cb[i][rctx->framebuffer->state.framebuffer.cbufs[i]->level]; + } + if (rctx->framebuffer->state.framebuffer.zsbuf) { + rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.zsbuf->texture; + rctx->hw_states.db = rtexture->db[rctx->framebuffer->state.framebuffer.zsbuf->level]; + } + for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, - &rctx->ps_sampler[i]->state.sampler, - i); + rctx->hw_states.ps_sampler[i] = rctx->ps_sampler[i]->rstate; + } else { + rctx->hw_states.ps_sampler[i] = NULL; } } rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - rctx->hw_states.ps_resource[i] = r600_resource(ctx, - &rctx->ps_sampler_view[i]->state.sampler_view, - i); + rctx->hw_states.ps_resource[i] = rctx->ps_sampler_view[i]->rstate; + } else { + rctx->hw_states.ps_resource[i] = NULL; } } rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; /* bind states */ - for (i = 0; i < ucp_nclip; i++) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp[i]); - if (r) - return r; - } + r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp); + if (r) + return r; r = radeon_draw_set(rctx->draw, rctx->hw_states.db); if (r) return r; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 77d627c..ec1d505 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -128,13 +128,25 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, return &resource->base.b; } +static void r600_texture_destroy_state(struct pipe_resource *ptexture) +{ + struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture; + + for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) { + radeon_state_decref(rtexture->scissor[i]); + radeon_state_decref(rtexture->db[i]); + for (int j = 0; j < 8; j++) { + radeon_state_decref(rtexture->cb[j][i]); + } + } +} + static void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; struct r600_resource *resource = &rtex->resource; struct r600_screen *rscreen = r600_screen(screen); - unsigned i; if (resource->bo) { radeon_bo_decref(rscreen->rw, resource->bo); @@ -142,11 +154,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, if (rtex->uncompressed) { radeon_bo_decref(rscreen->rw, rtex->uncompressed); } - for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) { - radeon_state_decref(rtex->scissor[i]); - radeon_state_decref(rtex->cb0[i]); - radeon_state_decref(rtex->db[i]); - } + r600_texture_destroy_state(ptex); FREE(rtex); } @@ -211,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; resource->bo = bo; + rtex->depth = 0; rtex->pitch_override = whandle->stride; rtex->bpt = util_format_get_blocksize(templ->format); rtex->pitch[0] = whandle->stride; + rtex->width[0] = templ->width0; + rtex->height[0] = templ->height0; rtex->offset[0] = 0; rtex->size = align(rtex->pitch[0] * templ->height0, 64); @@ -696,9 +707,9 @@ static struct radeon_state *r600_texture_state_scissor(struct r600_screen *rscre return rstate; } -static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen, +static struct radeon_state *r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, - unsigned level) + unsigned cb, unsigned level) { struct radeon_state *rstate; struct r600_resource *rbuffer; @@ -707,7 +718,7 @@ static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen, unsigned format, swap, ntype; const struct util_format_description *desc; - rstate = radeon_state(rscreen->rw, R600_STATE_CB0, 0); + rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0); if (rstate == NULL) return NULL; rbuffer = &rtexture->resource; @@ -770,6 +781,10 @@ static struct radeon_state *r600_texture_state_db(struct r600_screen *rscreen, if (rstate == NULL) return NULL; rbuffer = &rtexture->resource; + rtexture->tilled = 1; + rtexture->array_mode = 2; + rtexture->tile_type = 1; + rtexture->depth = 1; /* set states (most default value are 0 and struct already * initialized to 0, thus avoid resetting them) @@ -838,14 +853,14 @@ static struct radeon_state *r600_texture_state_viewport(struct r600_screen *rscr return rstate; } -int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) { struct r600_screen *rscreen = r600_screen(ctx->screen); - if (rtexture->cb0[level] == NULL) { - rtexture->cb0[level] = r600_texture_state_cb0(rscreen, rtexture, level); - if (rtexture->cb0[level] == NULL) { - R600_ERR("failed to create cb0 state for texture\n"); + if (rtexture->cb[cb][level] == NULL) { + rtexture->cb[cb][level] = r600_texture_state_cb(rscreen, rtexture, cb, level); + if (rtexture->cb[cb][level] == NULL) { + R600_ERR("failed to create cb%d state for texture\n", cb); return -ENOMEM; } } diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 046c264..3f1ca95 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -109,13 +109,11 @@ struct radeon_state { unsigned id; unsigned shader_index; unsigned nstates; - u32 *states; + u32 states[64]; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 *pm4; - u32 nimmd; - u32 *immd; + u32 pm4[128]; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -130,6 +128,7 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 type, u32 id struct radeon_state *radeon_state_incref(struct radeon_state *state); struct radeon_state *radeon_state_decref(struct radeon_state *state); int radeon_state_pm4(struct radeon_state *state); +int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type); /* * draw functions @@ -219,7 +218,7 @@ enum r600_stype { R600_STATE_DB, R600_STATE_QUERY_BEGIN, R600_STATE_QUERY_END, - R600_STATE_CLIP, + R600_STATE_UCP, R600_STATE_VGT, R600_STATE_DRAW, }; @@ -613,17 +612,37 @@ enum { /* R600_DRAW */ #define R600_DRAW__VGT_NUM_INDICES 0 #define R600_DRAW__VGT_DMA_BASE_HI 1 -#define R600_DRAW__VGT_DMA_BASE 2 +#define R600_DRAW__VGT_DMA_BASE 2 #define R600_DRAW__VGT_DRAW_INITIATOR 3 -#define R600_DRAW_SIZE 4 -#define R600_DRAW_PM4 128 +#define R600_DRAW_SIZE 4 +#define R600_DRAW_PM4 128 /* R600_CLIP */ -#define R600_CLIP__PA_CL_UCP_X_0 0 -#define R600_CLIP__PA_CL_UCP_Y_0 1 -#define R600_CLIP__PA_CL_UCP_Z_0 2 -#define R600_CLIP__PA_CL_UCP_W_0 3 -#define R600_CLIP_SIZE 4 -#define R600_CLIP_PM4 128 +#define R600_CLIP__PA_CL_UCP_X_0 0 +#define R600_CLIP__PA_CL_UCP_Y_0 1 +#define R600_CLIP__PA_CL_UCP_Z_0 2 +#define R600_CLIP__PA_CL_UCP_W_0 3 +#define R600_CLIP__PA_CL_UCP_X_1 4 +#define R600_CLIP__PA_CL_UCP_Y_1 5 +#define R600_CLIP__PA_CL_UCP_Z_1 6 +#define R600_CLIP__PA_CL_UCP_W_1 7 +#define R600_CLIP__PA_CL_UCP_X_2 8 +#define R600_CLIP__PA_CL_UCP_Y_2 9 +#define R600_CLIP__PA_CL_UCP_Z_2 10 +#define R600_CLIP__PA_CL_UCP_W_2 11 +#define R600_CLIP__PA_CL_UCP_X_3 12 +#define R600_CLIP__PA_CL_UCP_Y_3 13 +#define R600_CLIP__PA_CL_UCP_Z_3 14 +#define R600_CLIP__PA_CL_UCP_W_3 15 +#define R600_CLIP__PA_CL_UCP_X_4 16 +#define R600_CLIP__PA_CL_UCP_Y_4 17 +#define R600_CLIP__PA_CL_UCP_Z_4 18 +#define R600_CLIP__PA_CL_UCP_W_4 19 +#define R600_CLIP__PA_CL_UCP_X_5 20 +#define R600_CLIP__PA_CL_UCP_Y_5 21 +#define R600_CLIP__PA_CL_UCP_Z_5 22 +#define R600_CLIP__PA_CL_UCP_W_5 23 +#define R600_CLIP_SIZE 24 +#define R600_CLIP_PM4 128 /* R600 QUERY BEGIN/END */ #define R600_QUERY__OFFSET 0 #define R600_QUERY_SIZE 1 diff --git a/src/gallium/winsys/r600/drm/r600_state.c b/src/gallium/winsys/r600/drm/r600_state.c index e3d0116..f6a428e 100644 --- a/src/gallium/winsys/r600/drm/r600_state.c +++ b/src/gallium/winsys/r600/drm/r600_state.c @@ -80,7 +80,7 @@ struct radeon_stype_info r600_stypes[] = { { R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) }, { R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) }, { R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) }, - { R600_STATE_CLIP, 6, 0, r600_state_pm4_generic, SUB_NONE(UCP) }, + { R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) }, { R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) }, { R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) }, }; @@ -381,13 +381,6 @@ static int r600_state_pm4_draw(struct radeon_state *state) if (r) return r; state->pm4[state->cpm4++] = state->bo[0]->handle; - } else if (state->nimmd) { - state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_IMMD, state->nimmd + 1); - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES]; - state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR]; - for (i = 0; i < state->nimmd; i++) { - state->pm4[state->cpm4++] = state->immd[i]; - } } else { state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES]; diff --git a/src/gallium/winsys/r600/drm/r600_states.h b/src/gallium/winsys/r600/drm/r600_states.h index 51b69b9..09d79d4 100644 --- a/src/gallium/winsys/r600/drm/r600_states.h +++ b/src/gallium/winsys/r600/drm/r600_states.h @@ -284,10 +284,30 @@ static const struct radeon_register R600_names_VS_CONSTANT[] = { }; static const struct radeon_register R600_names_UCP[] = { - {0x00028e20, 0, 0, "PA_CL_UCP0_X"}, - {0x00028e24, 0, 0, "PA_CL_UCP0_Y"}, - {0x00028e28, 0, 0, "PA_CL_UCP0_Z"}, - {0x00028e2c, 0, 0, "PA_CL_UCP0_W"}, + {0x00028E20, 0, 0, "PA_CL_UCP0_X"}, + {0x00028E24, 0, 0, "PA_CL_UCP0_Y"}, + {0x00028E28, 0, 0, "PA_CL_UCP0_Z"}, + {0x00028E2C, 0, 0, "PA_CL_UCP0_W"}, + {0x00028E30, 0, 0, "PA_CL_UCP1_X"}, + {0x00028E34, 0, 0, "PA_CL_UCP1_Y"}, + {0x00028E38, 0, 0, "PA_CL_UCP1_Z"}, + {0x00028E3C, 0, 0, "PA_CL_UCP1_W"}, + {0x00028E40, 0, 0, "PA_CL_UCP2_X"}, + {0x00028E44, 0, 0, "PA_CL_UCP2_Y"}, + {0x00028E48, 0, 0, "PA_CL_UCP2_Z"}, + {0x00028E4C, 0, 0, "PA_CL_UCP2_W"}, + {0x00028E50, 0, 0, "PA_CL_UCP3_X"}, + {0x00028E54, 0, 0, "PA_CL_UCP3_Y"}, + {0x00028E58, 0, 0, "PA_CL_UCP3_Z"}, + {0x00028E5C, 0, 0, "PA_CL_UCP3_W"}, + {0x00028E60, 0, 0, "PA_CL_UCP4_X"}, + {0x00028E64, 0, 0, "PA_CL_UCP4_Y"}, + {0x00028E68, 0, 0, "PA_CL_UCP4_Z"}, + {0x00028E6C, 0, 0, "PA_CL_UCP4_W"}, + {0x00028E70, 0, 0, "PA_CL_UCP5_X"}, + {0x00028E74, 0, 0, "PA_CL_UCP5_Y"}, + {0x00028E78, 0, 0, "PA_CL_UCP5_Z"}, + {0x00028E7C, 0, 0, "PA_CL_UCP5_W"}, }; static const struct radeon_register R600_names_PS_RESOURCE[] = { diff --git a/src/gallium/winsys/r600/drm/radeon_priv.h b/src/gallium/winsys/r600/drm/radeon_priv.h index 66ee5f2..af5319e 100644 --- a/src/gallium/winsys/r600/drm/radeon_priv.h +++ b/src/gallium/winsys/r600/drm/radeon_priv.h @@ -38,19 +38,19 @@ struct radeon_register { }; struct radeon_sub_type { - int shader_type; - const struct radeon_register *regs; - unsigned nstates; + int shader_type; + const struct radeon_register *regs; + unsigned nstates; }; struct radeon_stype_info { - unsigned stype; - unsigned num; - unsigned stride; - radeon_state_pm4_t pm4; - struct radeon_sub_type reginfo[R600_SHADER_MAX]; - unsigned base_id; - unsigned npm4; + unsigned stype; + unsigned num; + unsigned stride; + radeon_state_pm4_t pm4; + struct radeon_sub_type reginfo[R600_SHADER_MAX]; + unsigned base_id; + unsigned npm4; }; struct radeon { diff --git a/src/gallium/winsys/r600/drm/radeon_state.c b/src/gallium/winsys/r600/drm/radeon_state.c index ef09fdf..d4e622c 100644 --- a/src/gallium/winsys/r600/drm/radeon_state.c +++ b/src/gallium/winsys/r600/drm/radeon_state.c @@ -80,15 +80,59 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 stype, u32 i state->refcount = 1; state->npm4 = found->npm4; state->nstates = found->reginfo[shader_index].nstates; - state->states = calloc(1, state->nstates * 4); - state->pm4 = calloc(1, found->npm4 * 4); - if (state->states == NULL || state->pm4 == NULL) { - radeon_state_decref(state); - return NULL; - } return state; } +int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type) +{ + struct radeon_stype_info *found = NULL; + int i, j, shader_index = -1; + + if (state == NULL) + return 0; + /* traverse the stype array */ + for (i = 0; i < state->radeon->nstype; i++) { + /* if the type doesn't match, if the shader doesn't match */ + if (stype != state->radeon->stype[i].stype) + continue; + if (shader_type) { + for (j = 0; j < 4; j++) { + if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) { + shader_index = j; + break; + } + } + if (shader_index == -1) + continue; + } else { + if (state->radeon->stype[i].reginfo[0].shader_type) + continue; + else + shader_index = 0; + } + if (id > state->radeon->stype[i].num) + continue; + + found = &state->radeon->stype[i]; + break; + } + + if (!found) { + fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type); + return -EINVAL; + } + + if (found->reginfo[shader_index].nstates != state->nstates) { + fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n", + state->stype->stype, state->id, state->shader_index, stype, id, shader_index); + } + + state->stype = found; + state->id = id; + state->shader_index = shader_index; + return radeon_state_pm4(state); +} + struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id) { return radeon_state_shader(radeon, type, id, 0); @@ -134,9 +178,6 @@ struct radeon_state *radeon_state_decref(struct radeon_state *state) for (i = 0; i < state->nbo; i++) { state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]); } - free(state->immd); - free(state->states); - free(state->pm4); memset(state, 0, sizeof(*state)); free(state); return NULL; @@ -179,8 +220,9 @@ int radeon_state_pm4(struct radeon_state *state) { int r; - if (state == NULL || state->cpm4) + if (state == NULL) return 0; + state->cpm4 = 0; r = state->stype->pm4(state); if (r) { fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n", -- 2.7.4