From 0813e58a3e41faf6f2072d034dfdc6198a3a1fee Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 30 Jan 2012 06:21:07 +0100 Subject: [PATCH] r600g: rework cache flushing This also significantly improves the RV670 flush by using the CB1 flush *always* and also DEST_BASE_0_ENA, which appears to magically fix some tests. I am not entirely sure, but it's possible that RV670 flushing is fixed completely. v2: fix cayman by flushing texture cache instead of vertex cache Thanks to Dave Airlie for testing Cayman. --- src/gallium/drivers/r600/evergreen_hw_context.c | 80 +++-------- src/gallium/drivers/r600/evergreen_state.c | 22 +-- src/gallium/drivers/r600/r600.h | 7 +- src/gallium/drivers/r600/r600_hw_context.c | 174 +++++++++++------------- src/gallium/drivers/r600/r600_pipe.c | 2 + src/gallium/drivers/r600/r600_pipe.h | 10 ++ src/gallium/drivers/r600/r600_state.c | 16 +-- src/gallium/drivers/r600/r600_state_common.c | 71 ++++++++++ src/gallium/drivers/r600/r600d.h | 1 + 9 files changed, 193 insertions(+), 190 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index f7fcf37..118ba26 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -274,14 +274,14 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, {R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0}, {R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0}, {R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0}, {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0}, - {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0}, {R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0}, {R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0}, - {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0}, {R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0}, {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0}, {R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, @@ -294,10 +294,10 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0}, {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0}, {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0}, + {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0}, {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, @@ -642,14 +642,14 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, {R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0}, {R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0}, {R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0}, {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0}, - {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0}, {R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0}, {R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0}, - {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0}, {R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0}, {CM_R_0288E8_SQ_LDS_ALLOC, 0, 0, 0}, {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0}, @@ -663,10 +663,10 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0}, {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0}, {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0}, + {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0}, {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, @@ -832,8 +832,8 @@ static const struct r600_reg cayman_context_reg_list[] = { static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { - {R_030000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, - {R_030004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_030000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0, 0}, + {R_030004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0, 0}, {R_030008_RESOURCE0_WORD2, 0, 0, 0}, {R_03000C_RESOURCE0_WORD3, 0, 0, 0}, {R_030010_RESOURCE0_WORD4, 0, 0, 0}, @@ -1188,54 +1188,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr cs->cdw += ndwords; } -void evergreen_context_flush_dest_caches(struct r600_context *ctx) -{ - struct r600_resource *cb[12]; - struct r600_resource *db; - - if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) - return; - - /* find number of color buffer */ - db = r600_context_reg_bo(ctx, R_028048_DB_Z_READ_BASE); - cb[0] = r600_context_reg_bo(ctx, R_028C60_CB_COLOR0_BASE); - cb[1] = r600_context_reg_bo(ctx, R_028C9C_CB_COLOR1_BASE); - cb[2] = r600_context_reg_bo(ctx, R_028CD8_CB_COLOR2_BASE); - cb[3] = r600_context_reg_bo(ctx, R_028D14_CB_COLOR3_BASE); - cb[4] = r600_context_reg_bo(ctx, R_028D50_CB_COLOR4_BASE); - cb[5] = r600_context_reg_bo(ctx, R_028D8C_CB_COLOR5_BASE); - cb[6] = r600_context_reg_bo(ctx, R_028DC8_CB_COLOR6_BASE); - cb[7] = r600_context_reg_bo(ctx, R_028E04_CB_COLOR7_BASE); - cb[8] = r600_context_reg_bo(ctx, R_028E40_CB_COLOR8_BASE); - cb[9] = r600_context_reg_bo(ctx, R_028E5C_CB_COLOR9_BASE); - cb[10] = r600_context_reg_bo(ctx, R_028E78_CB_COLOR10_BASE); - cb[11] = r600_context_reg_bo(ctx, R_028E94_CB_COLOR11_BASE); - - /* flush color buffer */ - for (int i = 0; i < 12; i++) { - if (cb[i]) { - unsigned flush; - - if (i > 7) { - flush = (S_0085F0_CB8_DEST_BASE_ENA(1) << (i - 8)) | - S_0085F0_CB_ACTION_ENA(1); - } else { - flush = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) | - S_0085F0_CB_ACTION_ENA(1); - } - r600_context_bo_flush(ctx, flush, 0, cb[i]); - } - } - if (db) { - r600_context_bo_flush(ctx, - S_0085F0_DB_ACTION_ENA(1) | - S_0085F0_DB_DEST_BASE_ENA(1), - 0, db); - } - - ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; -} - void evergreen_flush_vgt_streamout(struct r600_context *ctx) { struct radeon_winsys_cs *cs = ctx->cs; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 3131f56..bd46072 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1176,6 +1176,8 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + if (count) + r600_inval_texture_cache(rctx); memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; @@ -1190,6 +1192,9 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + if (count) + r600_inval_texture_cache(rctx); + for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_vs_sampler(rctx, rstates[i], i); } @@ -1525,7 +1530,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - evergreen_context_flush_dest_caches(rctx); + r600_flush_framebuffer(rctx, false); rctx->num_dest_buffers = state->nr_cbufs; /* unreference old buffer and reference new one */ @@ -1618,19 +1623,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } -static void evergreen_texture_barrier(struct pipe_context *ctx) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_context_flush_all(rctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | - S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | - S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | - S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | - S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) | - S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) | - S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1)); -} - void evergreen_init_state_functions(struct r600_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -1671,7 +1663,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; - rctx->context.texture_barrier = evergreen_texture_barrier; + rctx->context.texture_barrier = r600_texture_barrier; rctx->context.create_stream_output_target = r600_create_so_target; rctx->context.stream_output_target_destroy = r600_so_target_destroy; rctx->context.set_stream_output_targets = r600_set_so_targets; diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 06eb9d1..b5d2a89 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -237,8 +237,10 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int flag_wait); void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence, unsigned offset, unsigned value); -void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags); -void r600_context_flush_dest_caches(struct r600_context *ctx); +void r600_inval_shader_cache(struct r600_context *ctx); +void r600_inval_texture_cache(struct r600_context *ctx); +void r600_inval_vertex_cache(struct r600_context *ctx); +void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now); void r600_context_streamout_begin(struct r600_context *ctx); void r600_context_streamout_end(struct r600_context *ctx); @@ -249,7 +251,6 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 int evergreen_context_init(struct r600_context *ctx); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); -void evergreen_context_flush_dest_caches(struct r600_context *ctx); void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 3399466..d571df7 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -416,10 +416,10 @@ static const struct r600_reg r600_context_reg_list[] = { {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, - {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, - {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0}, + {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0}, + {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0}, {R_02823C_CB_SHADER_MASK, 0, 0, 0}, {R_028238_CB_TARGET_MASK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, @@ -587,11 +587,11 @@ static const struct r600_reg r600_context_reg_list[] = { {R_028638_SPI_VS_OUT_ID_9, 0, 0, 0}, {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, - {R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, - {R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0}, {R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0}, @@ -632,7 +632,7 @@ static const struct r600_reg r600_context_reg_list[] = { {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, {R_0286D8_SPI_INPUT_Z, 0, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0}, {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0}, {R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0}, @@ -675,8 +675,8 @@ int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsig static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { - {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, - {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0, 0}, + {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0, 0}, {R_038008_RESOURCE0_WORD2, 0, 0, 0}, {R_03800C_RESOURCE0_WORD3, 0, 0, 0}, {R_038010_RESOURCE0_WORD4, 0, 0, 0}, @@ -971,20 +971,6 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, } } -/* Flushes all surfaces */ -void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - r600_need_cs_space(ctx, 5, FALSE); - - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); - cs->buf[cs->cdw++] = flush_flags; /* CP_COHER_CNTL */ - cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ - cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ - cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ -} - void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_resource *bo) { @@ -997,38 +983,14 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, return; } - if ((ctx->screen->family < CHIP_RV770) && - (G_0085F0_CB_ACTION_ENA(flush_flags) || - G_0085F0_DB_ACTION_ENA(flush_flags))) { - if (ctx->flags & R600_CONTEXT_CHECK_EVENT_FLUSH) { - /* the rv670 seems to fail fbo-generatemipmap unless we flush the CB1 dest base ena */ - if ((bo->cs_buf->binding & BO_BOUND_TEXTURE) && - (flush_flags & S_0085F0_CB_ACTION_ENA(1))) { - if ((ctx->screen->family == CHIP_RV670) || - (ctx->screen->family == CHIP_RS780) || - (ctx->screen->family == CHIP_RS880)) { - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); - cs->buf[cs->cdw++] = S_0085F0_CB1_DEST_BASE_ENA(1); /* CP_COHER_CNTL */ - cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ - cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ - cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ - } - } - - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); - ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; - } - } else { - va = r600_resource_va(&ctx->screen->screen, (void *)bo); - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); - cs->buf[cs->cdw++] = flush_flags; - cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8; - cs->buf[cs->cdw++] = va >> 8; - cs->buf[cs->cdw++] = 0x0000000A; - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); - } + va = r600_resource_va(&ctx->screen->screen, (void *)bo); + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + cs->buf[cs->cdw++] = flush_flags; + cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8; + cs->buf[cs->cdw++] = va >> 8; + cs->buf[cs->cdw++] = 0x0000000A; + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); bo->cs_buf->last_flush = (bo->cs_buf->last_flush | flush_flags) & flush_mask; } @@ -1387,43 +1349,6 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 LIST_DELINIT(&block->list); } -void r600_context_flush_dest_caches(struct r600_context *ctx) -{ - struct r600_resource *cb[8]; - struct r600_resource *db; - int i; - - if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) - return; - - db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE); - cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE); - cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE); - cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE); - cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE); - cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE); - cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE); - cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE); - cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE); - - ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; - /* flush the color buffers */ - for (i = 0; i < 8; i++) { - if (!cb[i]) - continue; - - r600_context_bo_flush(ctx, - (S_0085F0_CB0_DEST_BASE_ENA(1) << i) | - S_0085F0_CB_ACTION_ENA(1), - 0, cb[i]); - } - if (db) { - r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1), 0, db); - } - ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; - ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; -} - void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) { struct radeon_winsys_cs *cs = ctx->cs; @@ -1483,6 +1408,66 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) cs->cdw += ndwords; } +void r600_inval_shader_cache(struct r600_context *ctx) +{ + ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ACTION_ENA(1); + r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); +} + +void r600_inval_texture_cache(struct r600_context *ctx) +{ + ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); + r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); +} + +void r600_inval_vertex_cache(struct r600_context *ctx) +{ + if (ctx->family == CHIP_RV610 || + ctx->family == CHIP_RV620 || + ctx->family == CHIP_RS780 || + ctx->family == CHIP_RS880 || + ctx->family == CHIP_RV710 || + ctx->family == CHIP_CEDAR || + ctx->family == CHIP_PALM || + ctx->family == CHIP_SUMO || + ctx->family == CHIP_SUMO2 || + ctx->family == CHIP_CAICOS || + ctx->family == CHIP_CAYMAN) { + /* Some GPUs don't have the vertex cache and must use the texture cache instead. */ + ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); + } else { + ctx->atom_surface_sync.flush_flags |= S_0085F0_VC_ACTION_ENA(1); + } + r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); +} + +void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) +{ + if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) + return; + + ctx->atom_surface_sync.flush_flags |= + r600_get_cb_flush_flags(ctx) | + (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0); + + if (flush_now) { + r600_emit_atom(ctx, &ctx->atom_surface_sync.atom); + } else { + r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); + } + + /* Also add a complete cache flush to work around broken flushing on R6xx. */ + if (ctx->chip_class == R600) { + if (flush_now) { + r600_emit_atom(ctx, &ctx->atom_r6xx_flush_and_inv); + } else { + r600_atom_dirty(ctx, &ctx->atom_r6xx_flush_and_inv); + } + } + + ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; +} + void r600_context_flush(struct r600_context *ctx, unsigned flags) { struct radeon_winsys_cs *cs = ctx->cs; @@ -1504,10 +1489,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) streamout_suspended = true; } - if (ctx->screen->chip_class >= EVERGREEN) - evergreen_context_flush_dest_caches(ctx); - else - r600_context_flush_dest_caches(ctx); + r600_flush_framebuffer(ctx, true); /* partial flush is needed to avoid lockups on some chips with user fences */ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index b927625..351ecfe 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -232,6 +232,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.create_video_decoder = vl_create_decoder; rctx->context.create_video_buffer = vl_video_buffer_create; + r600_init_common_atoms(rctx); + switch (rctx->chip_class) { case R600: case R700: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 78b6d83..c327954 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -67,6 +67,11 @@ struct r600_atom { struct list_head head; }; +struct r600_atom_surface_sync { + struct r600_atom atom; + unsigned flush_flags; /* CP_COHER_CNTL */ +}; + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -272,6 +277,8 @@ struct r600_context { /* States based on r600_state. */ struct list_head dirty_states; + struct r600_atom_surface_sync atom_surface_sync; + struct r600_atom atom_r6xx_flush_and_inv; /* Below are variables from the old r600_context. */ @@ -426,6 +433,9 @@ void r600_translate_index_buffer(struct r600_context *r600, unsigned count); /* r600_state_common.c */ +void r600_init_common_atoms(struct r600_context *rctx); +unsigned r600_get_cb_flush_flags(struct r600_context *rctx); +void r600_texture_barrier(struct pipe_context *ctx); void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib); void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f2e0bfc..7b3d813 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1176,6 +1176,9 @@ static void r600_set_sampler_views(struct r600_context *rctx, struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views; unsigned i; + if (count) + r600_inval_texture_cache(rctx); + for (i = 0; i < count; i++) { if (rviews[i]) { if (((struct r600_resource_texture *)rviews[i]->base.texture)->depth) @@ -1610,7 +1613,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - r600_context_flush_dest_caches(rctx); + r600_flush_framebuffer(rctx, false); rctx->num_dest_buffers = state->nr_cbufs; /* unreference old buffer and reference new one */ @@ -1700,17 +1703,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } -static void r600_texture_barrier(struct pipe_context *ctx) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_context_flush_all(rctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | - S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | - S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | - S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | - S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1)); -} - void r600_init_state_functions(struct r600_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 0c06ad0..7dae397 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -34,6 +34,70 @@ #include "r600_pipe.h" #include "r600d.h" +static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->cs; + struct r600_atom_surface_sync *a = (struct r600_atom_surface_sync*)atom; + + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + cs->buf[cs->cdw++] = a->flush_flags; /* CP_COHER_CNTL */ + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ + + a->flush_flags = 0; +} + +static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->cs; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); +} + +static void r600_init_atom(struct r600_atom *atom, + void (*emit)(struct r600_context *ctx, struct r600_atom *state), + unsigned num_dw, + enum r600_atom_flags flags) +{ + atom->emit = emit; + atom->num_dw = num_dw; + atom->flags = flags; +} + +void r600_init_common_atoms(struct r600_context *rctx) +{ + r600_init_atom(&rctx->atom_surface_sync.atom, r600_emit_surface_sync, 5, EMIT_EARLY); + r600_init_atom(&rctx->atom_r6xx_flush_and_inv, r600_emit_r6xx_flush_and_inv, 2, EMIT_EARLY); +} + +unsigned r600_get_cb_flush_flags(struct r600_context *rctx) +{ + unsigned flags = 0; + + if (rctx->framebuffer.nr_cbufs) { + flags |= S_0085F0_CB_ACTION_ENA(1) | + (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT); + } + + /* Workaround for broken flushing on some R6xx chipsets. */ + if (rctx->screen->family == CHIP_RV670 || + rctx->screen->family == CHIP_RS780 || + rctx->screen->family == CHIP_RS880) { + flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_DEST_BASE_0_ENA(1); + } + return flags; +} + +void r600_texture_barrier(struct pipe_context *ctx) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + rctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx); + r600_atom_dirty(rctx, &rctx->atom_surface_sync.atom); +} + static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim) { static const int prim_conv[] = { @@ -226,6 +290,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { + r600_inval_shader_cache(rctx); u_vbuf_bind_vertex_elements(rctx->vbuf_mgr, state, v->vmgr_elements); @@ -333,6 +398,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->ps_shader = (struct r600_pipe_shader *)state; if (state) { + r600_inval_shader_cache(rctx); r600_context_pipe_state_set(rctx, &rctx->ps_shader->rstate); rctx->cb_color_control &= C_028808_MULTIWRITE_ENABLE; @@ -350,6 +416,7 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) /* TODO delete old shader */ rctx->vs_shader = (struct r600_pipe_shader *)state; if (state) { + r600_inval_shader_cache(rctx); r600_context_pipe_state_set(rctx, &rctx->vs_shader->rstate); } if (rctx->ps_shader && rctx->vs_shader) { @@ -416,6 +483,8 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, return; } + r600_inval_shader_cache(rctx); + r600_upload_const_buffer(rctx, &rbuffer, &offset); va_offset = r600_resource_va(ctx->screen, (void*)rbuffer); va_offset += offset; @@ -558,6 +627,8 @@ static void r600_vertex_buffer_update(struct r600_context *rctx) struct pipe_vertex_buffer *vertex_buffer; unsigned i, count, offset; + r600_inval_vertex_cache(rctx); + if (rctx->vertex_elements->vbuffer_need_offset) { /* one resource per vertex elements */ count = rctx->vertex_elements->count; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index e2a526f..3c3238a 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3270,6 +3270,7 @@ #define S_0085F0_SO3_DEST_BASE_ENA(x) (((x) & 0x1) << 5) #define G_0085F0_SO3_DEST_BASE_ENA(x) (((x) >> 5) & 0x1) #define C_0085F0_SO3_DEST_BASE_ENA 0xFFFFFFDF +#define S_0085F0_CB0_DEST_BASE_ENA_SHIFT 6 #define S_0085F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6) #define G_0085F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1) #define C_0085F0_CB0_DEST_BASE_ENA 0xFFFFFFBF -- 2.7.4