From: Tom Stellard Date: Tue, 24 Jul 2012 16:59:05 +0000 (+0000) Subject: r600g: Emit dispatch state for compute directly to the cs X-Git-Tag: accepted/2.0alpha-wayland/20121114.171706~945 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fdd8df20e4a730f80bf4c331012d832bffd7072e;p=profile%2Fivi%2Fmesa.git r600g: Emit dispatch state for compute directly to the cs We no longer rely on an evergreen_compute_resource for emitting dispatch state. Reviewed-by: Marek Olšák --- diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 5e43fae..0d6eb4e 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -249,37 +249,21 @@ void evergreen_compute_upload_input( shader->input_size, 0); } -void evergreen_direct_dispatch( - struct pipe_context *ctx_, +static void evergreen_emit_direct_dispatch( + struct r600_context *rctx, const uint *block_layout, const uint *grid_layout) { - /* This struct r600_context* must be called rctx, because the - * r600_pipe_state_add_reg macro assumes there is a local variable - * of type struct r600_context* called rctx. - */ - struct r600_context *rctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; - int i; - - struct evergreen_compute_resource* res = get_empty_res(shader, - COMPUTE_RESOURCE_DISPATCH, 0); - - /* Set CB_TARGET_MASK */ - evergreen_reg_set(res, R_028238_CB_TARGET_MASK, rctx->compute_cb_target_mask); - - evergreen_reg_set(res, R_00899C_VGT_COMPUTE_START_X, 0); - evergreen_reg_set(res, R_0089A0_VGT_COMPUTE_START_Y, 0); - evergreen_reg_set(res, R_0089A4_VGT_COMPUTE_START_Z, 0); - - evergreen_reg_set(res, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, block_layout[0]); - evergreen_reg_set(res, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y, block_layout[1]); - evergreen_reg_set(res, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z, block_layout[2]); - + struct radeon_winsys_cs *cs = rctx->cs; + unsigned num_waves; + unsigned num_pipes = rctx->screen->info.r600_max_pipes; + unsigned wave_divisor = (16 * num_pipes); int group_size = 1; - int grid_size = 1; + /* XXX: Enable lds and get size from cs_shader_state */ + unsigned lds_size = 0; + /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { group_size *= block_layout[i]; } @@ -288,18 +272,50 @@ void evergreen_direct_dispatch( grid_size *= grid_layout[i]; } - evergreen_reg_set(res, R_008970_VGT_NUM_INDICES, group_size); - evergreen_reg_set(res, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, group_size); + /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */ + num_waves = (block_layout[0] * block_layout[1] * block_layout[2] + + wave_divisor - 1) / wave_divisor; + + COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n", + num_pipes, num_waves); - evergreen_emit_raw_value(res, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0)); - evergreen_emit_raw_value(res, grid_layout[0]); - evergreen_emit_raw_value(res, grid_layout[1]); - evergreen_emit_raw_value(res, grid_layout[2]); - ///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN - evergreen_emit_raw_value(res, 1); + /* XXX: Partition the LDS between PS/CS. By default half (4096 dwords + * on Evergreen) oes to Pixel Shaders and half goes to Compute Shaders. + * We may need to allocat the entire LDS space for Compute Shaders. + * + * EG: R_008E2C_SQ_LDS_RESOURCE_MGMT := S_008E2C_NUM_LS_LDS(lds_dwords) + * CM: CM_R_0286FC_SPI_LDS_MGMT := S_0286FC_NUM_LS_LDS(lds_dwords) + */ + + r600_write_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size); + + r600_write_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3); + r600_write_value(cs, 0); /* R_00899C_VGT_COMPUTE_START_X */ + r600_write_value(cs, 0); /* R_0089A0_VGT_COMPUTE_START_Y */ + r600_write_value(cs, 0); /* R_0089A4_VGT_COMPUTE_START_Z */ + + r600_write_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, + group_size); + + r600_write_compute_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3); + r600_write_value(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */ + r600_write_value(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */ + r600_write_value(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */ + + r600_write_compute_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC, + lds_size | (num_waves << 14)); + + /* Dispatch packet */ + r600_write_value(cs, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0)); + r600_write_value(cs, grid_layout[0]); + r600_write_value(cs, grid_layout[1]); + r600_write_value(cs, grid_layout[2]); + /* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */ + r600_write_value(cs, 1); } -static void compute_emit_cs(struct r600_context *ctx) +static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, + const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->cs; int i; @@ -330,6 +346,11 @@ static void compute_emit_cs(struct r600_context *ctx) cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); + /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ + r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, + ctx->compute_cb_target_mask); + + /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); @@ -370,6 +391,9 @@ static void compute_emit_cs(struct r600_context *ctx) } } + /* Emit dispatch state and dispatch packet */ + evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); + /* r600_flush_framebuffer() updates the cb_flush_flags and then * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits * a SURFACE_SYNC packet via r600_emit_surface_sync(). @@ -438,24 +462,12 @@ static void evergreen_launch_grid( const uint *block_layout, const uint *grid_layout, uint32_t pc, const void *input) { - COMPUTE_DBG("PC: %i\n", pc); - struct r600_context *ctx = (struct r600_context *)ctx_; - unsigned num_waves; - unsigned num_pipes = ctx->screen->info.r600_max_pipes; - unsigned wave_divisor = (16 * num_pipes); - - /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */ - num_waves = (block_layout[0] * block_layout[1] * block_layout[2] + - wave_divisor - 1) / wave_divisor; - COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n", - num_pipes, num_waves); + COMPUTE_DBG("PC: %i\n", pc); - evergreen_set_lds(ctx->cs_shader_state.shader, 0, 0, num_waves); evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); - evergreen_direct_dispatch(ctx_, block_layout, grid_layout); - compute_emit_cs(ctx); + compute_emit_cs(ctx, block_layout, grid_layout); } static void evergreen_set_compute_resources(struct pipe_context * ctx_, diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index df3fb83..f29d91b 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -34,7 +34,6 @@ struct evergreen_compute_resource; void *evergreen_create_compute_state(struct pipe_context *ctx, const const struct pipe_compute_state *cso); void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); -void evergreen_direct_dispatch( struct pipe_context *context, const uint *block_layout, const uint *grid_layout); void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input); void evergreen_init_atom_start_compute_cs(struct r600_context *rctx); void evergreen_init_compute_state_functions(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 1d11bab..ac884b4 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -301,25 +301,6 @@ void evergreen_set_rat( r600_context_pipe_state_set(pipe->ctx, state); } -void evergreen_set_lds( - struct r600_pipe_compute *pipe, - int num_lds, - int size, - int num_waves) -{ - struct evergreen_compute_resource* res = - get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0); - - if (pipe->ctx->chip_class < CAYMAN) { - evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT, - S_008E2C_NUM_LS_LDS(num_lds)); - } else { - evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT, - S_0286FC_NUM_LS_LDS(num_lds)); - } - evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14); -} - void evergreen_set_gds( struct r600_pipe_compute *pipe, uint32_t addr, diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 5fa9c48..2bef261 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -102,7 +102,6 @@ int get_compute_resource_num(void); #define evergreen_mult_reg_set(res, index, array) evergreen_mult_reg_set_(res, index, array, sizeof(array)) void evergreen_set_rat(struct r600_pipe_compute *pipe, int id, struct r600_resource* bo, int start, int size); -void evergreen_set_lds(struct r600_pipe_compute *pipe, int num_lds, int size, int num_waves); void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t size); void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size); void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);