Unused and unstable. Keep it only for gfx11.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23687>
{
assert(!list_is_empty(&sctx->shader_query_buffers));
- struct gfx10_sh_query_buffer *qbuf =
- list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
+ struct gfx11_sh_query_buffer *qbuf =
+ list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
+ qbuf->head += sizeof(struct gfx11_sh_query_buffer_mem);
}
-static void gfx10_release_query_buffers(struct si_context *sctx,
- struct gfx10_sh_query_buffer *first,
- struct gfx10_sh_query_buffer *last)
+static void gfx11_release_query_buffers(struct si_context *sctx,
+ struct gfx11_sh_query_buffer *first,
+ struct gfx11_sh_query_buffer *last)
{
while (first) {
- struct gfx10_sh_query_buffer *qbuf = first;
+ struct gfx11_sh_query_buffer *qbuf = first;
if (first != last)
- first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
+ first = list_entry(qbuf->list.next, struct gfx11_sh_query_buffer, list);
else
first = NULL;
}
}
-static bool gfx10_alloc_query_buffer(struct si_context *sctx)
+static bool gfx11_alloc_query_buffer(struct si_context *sctx)
{
if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
return true;
- struct gfx10_sh_query_buffer *qbuf = NULL;
+ struct gfx11_sh_query_buffer *qbuf = NULL;
if (!list_is_empty(&sctx->shader_query_buffers)) {
- qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
- if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
+ qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
+ if (qbuf->head + sizeof(struct gfx11_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
goto success;
- qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
+ qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
if (!qbuf->refcount &&
!si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
}
if (!qbuf) {
- qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
+ qbuf = CALLOC_STRUCT(gfx11_sh_query_buffer);
if (unlikely(!qbuf))
return false;
struct si_screen *screen = sctx->screen;
unsigned buf_size =
- MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);
+ MAX2(sizeof(struct gfx11_sh_query_buffer_mem), screen->info.min_alloc_size);
qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
if (unlikely(!qbuf->buf)) {
FREE(qbuf);
PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
assert(results);
- for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;
+ for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx11_sh_query_buffer_mem); i < e;
++i) {
for (unsigned j = 0; j < 16; ++j)
results[32 * i + j] = (uint64_t)1 << 63;
struct pipe_shader_buffer sbuf;
sbuf.buffer = &qbuf->buf->b.b;
sbuf.buffer_offset = qbuf->head;
- sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
+ sbuf.buffer_size = sizeof(struct gfx11_sh_query_buffer_mem);
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
return true;
}
-static void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
+static void gfx11_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
- gfx10_release_query_buffers(sctx, query->first, query->last);
+ struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
+ gfx11_release_query_buffers(sctx, query->first, query->last);
FREE(query);
}
-static bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
+static bool gfx11_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
- gfx10_release_query_buffers(sctx, query->first, query->last);
+ gfx11_release_query_buffers(sctx, query->first, query->last);
query->first = query->last = NULL;
- if (unlikely(!gfx10_alloc_query_buffer(sctx)))
+ if (unlikely(!gfx11_alloc_query_buffer(sctx)))
return false;
- query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
+ query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
query->first_begin = query->first->head;
sctx->num_active_shader_queries++;
return true;
}
-static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
+static bool gfx11_sh_query_end(struct si_context *sctx, struct si_query *rquery)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
if (unlikely(!query->first))
return false; /* earlier out of memory error */
- query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
+ query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
query->last_end = query->last->head;
/* Signal the fence of the previous chunk */
if (query->last_end != 0) {
uint64_t fence_va = query->last->buf->gpu_address;
- fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
- fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
+ fence_va += query->last_end - sizeof(struct gfx11_sh_query_buffer_mem);
+ fence_va += offsetof(struct gfx11_sh_query_buffer_mem, fence);
si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
0xffffffff, PIPE_QUERY_GPU_FINISHED);
return true;
}
-static void gfx10_sh_query_add_result(struct gfx10_sh_query *query,
- struct gfx10_sh_query_buffer_mem *qmem,
+static void gfx11_sh_query_add_result(struct gfx11_sh_query *query,
+ struct gfx11_sh_query_buffer_mem *qmem,
union pipe_query_result *result)
{
static const uint64_t mask = ((uint64_t)1 << 63) - 1;
}
}
-static bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
+static bool gfx11_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
union pipe_query_result *result)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
util_query_clear_result(result, query->b.type);
return false; /* earlier out of memory error */
assert(query->last);
- for (struct gfx10_sh_query_buffer *qbuf = query->last;;
- qbuf = list_entry(qbuf->list.prev, struct gfx10_sh_query_buffer, list)) {
+ for (struct gfx11_sh_query_buffer *qbuf = query->last;;
+ qbuf = list_entry(qbuf->list.prev, struct gfx11_sh_query_buffer, list)) {
unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
void *map;
results_end = query->last_end;
while (results_begin != results_end) {
- struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
+ struct gfx11_sh_query_buffer_mem *qmem = map + results_begin;
results_begin += sizeof(*qmem);
- gfx10_sh_query_add_result(query, qmem, result);
+ gfx11_sh_query_add_result(query, qmem, result);
}
if (qbuf == query->first)
return true;
}
-static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
+static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
enum pipe_query_flags flags,
enum pipe_query_value_type result_type,
int index, struct pipe_resource *resource,
unsigned offset)
{
- struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
+ struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
struct si_qbo_state saved_state = {};
struct pipe_resource *tmp_buffer = NULL;
unsigned tmp_buffer_offset = 0;
if (!sctx->sh_query_result_shader) {
- sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
+ sctx->sh_query_result_shader = gfx11_create_sh_query_result_cs(sctx);
if (!sctx->sh_query_result_shader)
return;
}
grid.grid[1] = 1;
grid.grid[2] = 1;
- struct gfx10_sh_query_buffer *qbuf = query->first;
+ struct gfx11_sh_query_buffer *qbuf = query->first;
for (;;) {
unsigned begin = qbuf == query->first ? query->first_begin : 0;
unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
ssbo[0].buffer_offset = begin;
ssbo[0].buffer_size = end - begin;
- consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
+ consts.result_count = (end - begin) / sizeof(struct gfx11_sh_query_buffer_mem);
consts.chain = 0;
if (qbuf != query->first)
consts.chain |= 1;
* serialized in the CP.
*/
va = qbuf->buf->gpu_address;
- va += end - sizeof(struct gfx10_sh_query_buffer_mem);
- va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
+ va += end - sizeof(struct gfx11_sh_query_buffer_mem);
+ va += offsetof(struct gfx11_sh_query_buffer_mem, fence);
si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
}
if (qbuf == query->last)
break;
- qbuf = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
+ qbuf = list_entry(qbuf->list.next, struct gfx11_sh_query_buffer, list);
}
si_restore_qbo_state(sctx, &saved_state);
pipe_resource_reference(&tmp_buffer, NULL);
}
-static const struct si_query_ops gfx10_sh_query_ops = {
- .destroy = gfx10_sh_query_destroy,
- .begin = gfx10_sh_query_begin,
- .end = gfx10_sh_query_end,
- .get_result = gfx10_sh_query_get_result,
- .get_result_resource = gfx10_sh_query_get_result_resource,
+static const struct si_query_ops gfx11_sh_query_ops = {
+ .destroy = gfx11_sh_query_destroy,
+ .begin = gfx11_sh_query_begin,
+ .end = gfx11_sh_query_end,
+ .get_result = gfx11_sh_query_get_result,
+ .get_result_resource = gfx11_sh_query_get_result_resource,
};
-struct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
+struct pipe_query *gfx11_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
unsigned index)
{
- struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
+ struct gfx11_sh_query *query = CALLOC_STRUCT(gfx11_sh_query);
if (unlikely(!query))
return NULL;
- query->b.ops = &gfx10_sh_query_ops;
+ query->b.ops = &gfx11_sh_query_ops;
query->b.type = query_type;
query->stream = index;
return (struct pipe_query *)query;
}
-void gfx10_init_query(struct si_context *sctx)
+void gfx11_init_query(struct si_context *sctx)
{
list_inithead(&sctx->shader_query_buffers);
sctx->atoms.s.shader_query.emit = emit_shader_query;
}
-void gfx10_destroy_query(struct si_context *sctx)
+void gfx11_destroy_query(struct si_context *sctx)
{
if (!sctx->shader_query_buffers.next)
return;
while (!list_is_empty(&sctx->shader_query_buffers)) {
- struct gfx10_sh_query_buffer *qbuf =
- list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
+ struct gfx11_sh_query_buffer *qbuf =
+ list_first_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
list_del(&qbuf->list);
assert(!qbuf->refcount);
files_libradeonsi = files(
'driinfo_radeonsi.h',
- 'gfx10_query.c',
'gfx10_shader_ngg.c',
+ 'gfx11_query.c',
'si_blit.c',
'si_buffer.c',
'si_build_pm4.h',
* idle when we leave the IB, otherwise another process
* might overwrite it while our shaders are busy.
*/
- if (sscreen->use_ngg_streamout) {
- if (ctx->gfx_level >= GFX11)
- wait_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
- else
- wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
- }
+ if (ctx->gfx_level >= GFX11)
+ wait_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
}
}
static void si_add_gds_to_buffer_list(struct si_context *sctx)
{
- if (sctx->screen->gds)
- sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->screen->gds, RADEON_USAGE_READWRITE, 0);
if (sctx->screen->gds_oa)
sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->screen->gds_oa, RADEON_USAGE_READWRITE, 0);
}
{
struct radeon_winsys *ws = sctx->ws;
- assert(sctx->screen->use_ngg_streamout);
+ assert(sctx->gfx_level >= GFX11);
if (sctx->screen->gds_oa)
return;
- assert(!sctx->screen->gds && !sctx->screen->gds_oa);
-
- /* Gfx11 only uses GDS OA, not GDS memory.
- * Gfx10 needs 256B (64 dw) of GDS, otherwise streamout hangs.
- */
+ /* Gfx11 only uses GDS OA, not GDS memory. */
simple_mtx_lock(&sctx->screen->gds_mutex);
if (!sctx->screen->gds_oa) {
sctx->screen->gds_oa = ws->buffer_create(ws, 1, 1, RADEON_DOMAIN_OA, RADEON_FLAG_DRIVER_INTERNAL);
assert(sctx->screen->gds_oa);
-
- if (sctx->gfx_level < GFX11) {
- sctx->screen->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, RADEON_FLAG_DRIVER_INTERNAL);
- assert(sctx->screen->gds);
- }
}
simple_mtx_unlock(&sctx->screen->gds_mutex);
si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
- if (!ctx->screen->use_ngg_streamout)
+ if (ctx->gfx_level < GFX11)
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
/* CLEAR_STATE disables all window rectangles. */
if (!has_clear_state || ctx->num_window_rectangles > 0)
unsigned stream = nir_intrinsic_stream_id(intrin);
unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
- offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
- offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
+ offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].generated_primitives) :
+ offsetof(struct gfx11_sh_query_buffer_mem, stream[stream].emitted_primitives);
nir_ssa_def *prim_count = intrin->src[0].ssa;
nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), prim_count,
si_release_all_descriptors(sctx);
if (sctx->gfx_level >= GFX10 && sctx->has_graphics)
- gfx10_destroy_query(sctx);
+ gfx11_destroy_query(sctx);
if (sctx->sqtt) {
struct si_screen *sscreen = sctx->screen;
/* Initialize graphics-only context functions. */
if (sctx->has_graphics) {
if (sctx->gfx_level >= GFX10)
- gfx10_init_query(sctx);
+ gfx11_init_query(sctx);
si_init_msaa_functions(sctx);
si_init_shader_functions(sctx);
si_init_state_functions(sctx);
simple_mtx_destroy(&sscreen->gpu_load_mutex);
simple_mtx_destroy(&sscreen->gds_mutex);
- radeon_bo_reference(sscreen->ws, &sscreen->gds, NULL);
radeon_bo_reference(sscreen->ws, &sscreen->gds_oa, NULL);
slab_destroy_parent(&sscreen->pool_transfers);
if (sscreen->info.gfx_level >= GFX11) {
sscreen->use_ngg = true;
- sscreen->use_ngg_streamout = true;
/* TODO: Disable for now. Investigate if it helps. */
sscreen->use_ngg_culling = (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) &&
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
sscreen->info.gfx_level >= GFX10 &&
(sscreen->info.family != CHIP_NAVI14 ||
sscreen->info.is_pro_graphics);
- sscreen->use_ngg_streamout = false;
sscreen->use_ngg_culling = sscreen->use_ngg &&
sscreen->info.max_render_backends >= 2 &&
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
bool dpbb_allowed;
bool use_ngg;
bool use_ngg_culling;
- bool use_ngg_streamout;
bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */
bool always_allow_dcc_stores;
/* NGG streamout. */
simple_mtx_t gds_mutex;
- struct pb_buffer *gds;
struct pb_buffer *gds_oa;
};
void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx);
void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array);
void *si_create_query_result_cs(struct si_context *sctx);
-void *gfx10_create_sh_query_result_cs(struct si_context *sctx);
+void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
-/* gfx10_query.c */
-void gfx10_init_query(struct si_context *sctx);
-void gfx10_destroy_query(struct si_context *sctx);
+/* gfx11_query.c */
+void gfx11_init_query(struct si_context *sctx);
+void gfx11_destroy_query(struct si_context *sctx);
/* si_test_image_copy_region.c */
void si_test_image_copy_region(struct si_screen *sscreen);
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
- if (ctx->screen->use_ngg_streamout && (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
- query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
- struct gfx10_sh_query *gfx10_query = (struct gfx10_sh_query *)query;
- struct gfx10_sh_query_buffer *qbuf, *first, *last;
+ if (ctx->gfx_level >= GFX11 && (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
+ struct gfx11_sh_query *gfx10_query = (struct gfx11_sh_query *)query;
+ struct gfx11_sh_query_buffer *qbuf, *first, *last;
op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
while (first) {
qbuf = first;
if (first != last)
- first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
+ first = list_entry(qbuf->list.next, struct gfx11_sh_query_buffer, list);
else
first = NULL;
unsigned begin = qbuf == gfx10_query->first ? gfx10_query->first_begin : 0;
unsigned end = qbuf == gfx10_query->last ? gfx10_query->last_end : qbuf->buf->b.b.width0;
- unsigned count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
+ unsigned count = (end - begin) / sizeof(struct gfx11_sh_query_buffer_mem);
do {
if (gfx10_query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
op |= PREDICATION_CONTINUE;
}
- results_base += sizeof(struct gfx10_sh_query_buffer_mem);
+ results_base += sizeof(struct gfx11_sh_query_buffer_mem);
} while (count--);
}
} else {
(query_type >= PIPE_QUERY_DRIVER_SPECIFIC))
return si_query_sw_create(query_type);
- if (sscreen->use_ngg_streamout &&
+ if (sscreen->info.gfx_level >= GFX11 &&
(query_type == PIPE_QUERY_PRIMITIVES_EMITTED ||
query_type == PIPE_QUERY_PRIMITIVES_GENERATED || query_type == PIPE_QUERY_SO_STATISTICS ||
query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE))
- return gfx10_sh_query_create(sscreen, query_type, index);
+ return gfx11_sh_query_create(sscreen, query_type, index);
return si_query_hw_create(sscreen, query_type, index);
}
* point into the ring, allowing an arbitrary number of queries to be active
* without additional GPU cost.
*/
-struct gfx10_sh_query_buffer {
+struct gfx11_sh_query_buffer {
struct list_head list;
struct si_resource *buf;
unsigned refcount;
* SET_PREDICATION packet, which also means that we're setting the high bit
* of all those values unconditionally.
*/
-struct gfx10_sh_query_buffer_mem {
+struct gfx11_sh_query_buffer_mem {
struct {
uint64_t generated_primitives_start_dummy;
uint64_t emitted_primitives_start_dummy;
uint32_t pad[31];
};
-struct gfx10_sh_query {
+struct gfx11_sh_query {
struct si_query b;
- struct gfx10_sh_query_buffer *first;
- struct gfx10_sh_query_buffer *last;
+ struct gfx11_sh_query_buffer *first;
+ struct gfx11_sh_query_buffer *last;
unsigned first_begin;
unsigned last_end;
unsigned stream;
};
-struct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
+struct pipe_query *gfx11_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
unsigned index);
/* Performance counters */
{
struct si_shader_selector *sel = shader->selector;
- if (sel->screen->use_ngg_streamout) {
+ if (shader->selector->screen->info.gfx_level >= GFX11) {
+ /* NGG streamout. */
if (sel->stage == MESA_SHADER_TESS_EVAL)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return;
options.passthrough = gfx10_is_ngg_passthrough(shader);
options.use_edgeflags = gfx10_edgeflags_have_effect(shader);
options.has_gen_prim_query = options.has_xfb_prim_query =
- sel->screen->use_ngg_streamout && !sel->info.base.vs.blit_sgprs_amd;
+ sel->screen->info.gfx_level >= GFX11 && !sel->info.base.vs.blit_sgprs_amd;
options.export_primitive_id = key->ge.mono.u.vs_export_prim_id;
options.instance_rate_inputs = instance_rate_inputs;
options.user_clip_plane_enable_mask = clip_plane_enable;
options.gs_out_vtx_bytes = sel->info.gsvs_vertex_size;
options.has_gen_prim_query = options.has_xfb_prim_query =
- sel->screen->use_ngg_streamout;
+ sel->screen->info.gfx_level >= GFX11;
NIR_PASS_V(nir, ac_nir_lower_ngg_gs, &options);
}
* 2: write next summary buffer
* 0.w = result_count
*/
-void *gfx10_create_sh_query_result_cs(struct si_context *sctx)
+void *gfx11_create_sh_query_result_cs(struct si_context *sctx)
{
/* TEMP[0].x = accumulated result so far
* TEMP[0].y = result missing
else if (sscreen->info.gfx_level == GFX9)
rsrc2 |= S_00B12C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
- if (!sscreen->use_ngg_streamout && si_shader_uses_streamout(shader)) {
+ if (si_shader_uses_streamout(shader)) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
if (sel->stage <= MESA_SHADER_GEOMETRY &&
sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask ||
- sscreen->use_ngg_streamout) &&
+ sscreen->info.gfx_level >= GFX11) &&
((sel->stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
sel->stage == MESA_SHADER_TESS_EVAL || sel->stage == MESA_SHADER_GEOMETRY))
shader->key.ge.as_ngg = 1;
sctx->streamout.stride_in_dw = shader_with_so->info.base.xfb_stride;
/* GDS must be allocated when any GDS instructions are used, otherwise it hangs. */
- if (sctx->screen->use_ngg_streamout && shader_with_so->info.enabled_streamout_buffer_mask)
+ if (sctx->gfx_level >= GFX11 && shader_with_so->info.enabled_streamout_buffer_mask)
si_allocate_gds(sctx);
}
if (sctx->shader.gs.cso && sctx->shader.tes.cso && sctx->shader.gs.cso->tess_turns_off_ngg) {
new_ngg = false;
- } else if (!sctx->screen->use_ngg_streamout) {
+ } else if (sctx->gfx_level < GFX11) {
struct si_shader_selector *last = si_get_vs(sctx)->cso;
if ((last && last->info.enabled_streamout_buffer_mask) ||
*/
sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
- if (sctx->screen->use_ngg_streamout) {
- if (sctx->gfx_level >= GFX11) {
- sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
- } else {
- /* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
- }
+ if (sctx->gfx_level >= GFX11) {
+ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
/* Wait now. This is needed to make sure that GDS is not
* busy at the end of IBs.
/* Allocate space for the filled buffer size. */
struct si_streamout_target *t = sctx->streamout.targets[i];
if (!t->buf_filled_size) {
- unsigned buf_filled_size_size = sctx->screen->use_ngg_streamout ? 8 : 4;
+ unsigned buf_filled_size_size = sctx->gfx_level >= GFX11 ? 8 : 4;
u_suballocator_alloc(&sctx->allocator_zeroed_memory, buf_filled_size_size, 4,
&t->buf_filled_size_offset,
(struct pipe_resource **)&t->buf_filled_size);
struct pipe_shader_buffer sbuf;
sbuf.buffer = targets[i]->buffer;
- if (sctx->screen->use_ngg_streamout) {
+ if (sctx->gfx_level >= GFX11) {
sbuf.buffer_offset = targets[i]->buffer_offset;
sbuf.buffer_size = targets[i]->buffer_size;
} else {
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
struct si_streamout_target **t = sctx->streamout.targets;
- if (!sctx->screen->use_ngg_streamout)
+ if (sctx->gfx_level < GFX11)
si_flush_vgt_streamout(sctx);
for (unsigned i = 0; i < sctx->streamout.num_targets; i++) {
radeon_set_uconfig_reg(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0);
radeon_end();
}
- } else if (sctx->screen->use_ngg_streamout) {
- bool append = sctx->streamout.append_bitmask & (1 << i);
- uint64_t va = 0;
-
- if (append) {
- radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, t[i]->buf_filled_size,
- RADEON_USAGE_READ | RADEON_PRIO_SO_FILLED_SIZE);
-
- va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
- }
-
- radeon_begin(cs);
- radeon_emit(PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
- S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(1));
- radeon_emit(va);
- radeon_emit(va >> 32);
- radeon_emit(4 * i); /* destination in GDS */
- radeon_emit(0);
- radeon_emit(S_415_BYTE_COUNT_GFX9(4));
- radeon_end();
} else {
/* Legacy streamout.
*
/* Wait for streamout to finish before reading GDS_STRMOUT registers. */
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
- } else if (!sctx->screen->use_ngg_streamout) {
+ } else {
si_flush_vgt_streamout(sctx);
}
COPY_DATA_REG, NULL,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
- } else if (sctx->screen->use_ngg_streamout) {
- /* TODO: PS_DONE doesn't ensure completion of VS if there are no PS waves. */
- si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_GDS,
- t[i]->buf_filled_size, va, EOP_DATA_GDS(i, 1), 0);
} else {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
static void si_emit_streamout_enable(struct si_context *sctx)
{
- assert(!sctx->screen->use_ngg_streamout);
+ assert(sctx->gfx_level < GFX11);
radeon_begin(&sctx->gfx_cs);
radeon_set_context_reg_seq(R_028B94_VGT_STRMOUT_CONFIG, 2);
sctx->streamout.enabled_mask | (sctx->streamout.enabled_mask << 4) |
(sctx->streamout.enabled_mask << 8) | (sctx->streamout.enabled_mask << 12);
- if (!sctx->screen->use_ngg_streamout &&
+ if (sctx->gfx_level < GFX11 &&
((old_strmout_en != si_get_strmout_en(sctx)) ||
(old_hw_enabled_mask != sctx->streamout.hw_enabled_mask)))
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
void si_update_prims_generated_query_state(struct si_context *sctx, unsigned type, int diff)
{
- if (!sctx->screen->use_ngg_streamout && type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ if (sctx->gfx_level < GFX11 && type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = si_get_strmout_en(sctx);
sctx->streamout.num_prims_gen_queries += diff;
sctx->b.set_stream_output_targets = si_set_streamout_targets;
sctx->atoms.s.streamout_begin.emit = si_emit_streamout_begin;
- if (!sctx->screen->use_ngg_streamout)
+ if (sctx->gfx_level < GFX11)
sctx->atoms.s.streamout_enable.emit = si_emit_streamout_enable;
}