const uint32_t *clear_value, unsigned flags,
enum si_coherency coher)
{
- struct pipe_context *ctx = &sctx->b;
-
assert(dst_offset % 4 == 0);
assert(size % 4 == 0);
unsigned size_12 = DIV_ROUND_UP(size, 12);
struct pipe_grid_info info = {0};
if (!sctx->cs_clear_12bytes_buffer)
- sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(ctx);
+ sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx);
info.block[0] = 64;
info.last_block[0] = size_12 % 64;
bool dst_stream_cache_policy, bool is_copy);
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type);
-void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx);
+void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array);
void *si_create_query_result_cs(struct si_context *sctx);
void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
return create_shader_state(sctx, b.shader);
}
+
+void *si_clear_12bytes_buffer_shader(struct si_context *sctx)
+{
+ const nir_shader_compiler_options *options =
+ sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
+
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_12bytes_buffer");
+ b.shader->info.workgroup_size[0] = 64;
+ b.shader->info.workgroup_size[1] = 1;
+ b.shader->info.workgroup_size[2] = 1;
+ b.shader->info.cs.user_data_components_amd = 3;
+
+ nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12);
+ nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3);
+
+ nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset,
+ .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_NON_TEMPORAL : 0);
+
+ return create_shader_state(sctx, b.shader);
+}
return sctx->b.create_compute_state(&sctx->b, &state);
}
-void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx)
-{
- static const char text[] = "COMP\n"
- "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"
- "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
- "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
- "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n"
- "DCL SV[0], THREAD_ID\n"
- "DCL SV[1], BLOCK_ID\n"
- "DCL SV[2], CS_USER_DATA_AMD\n"
- "DCL BUFFER[0]\n"
- "DCL TEMP[0..0]\n"
- "IMM[0] UINT32 {64, 1, 12, 0}\n"
- "UMAD TEMP[0].x, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n"
- "UMUL TEMP[0].x, TEMP[0].xyzz, IMM[0].zzzz\n" // 12 bytes
- "STORE BUFFER[0].xyz, TEMP[0].xxxx, SV[2].xyzz%s\n"
- "END\n";
- char final_text[2048];
- struct tgsi_token tokens[1024];
- struct pipe_compute_state state = {0};
-
- snprintf(final_text, sizeof(final_text), text,
- SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ", STREAM_CACHE_POLICY" : "");
-
- if (!tgsi_text_translate(final_text, tokens, ARRAY_SIZE(tokens))) {
- assert(false);
- return NULL;
- }
-
- state.ir_type = PIPE_SHADER_IR_TGSI;
- state.prog = tokens;
-
- return ctx->create_compute_state(ctx, &state);
-}
-
/* Load samples from the image, and copy them to the same image. This looks like
* a no-op, but it's not. Loads use FMASK, while stores don't, so samples are
* reordered to match expanded FMASK.