From: Pierre-Eric Pelloux-Prayer Date: Thu, 23 Jul 2020 08:29:14 +0000 (+0200) Subject: radeonsi: introduce SI_RESOURCE_FLAG_INTERNAL / RADEON_FLAG_DRIVER_INTERNAL X-Git-Tag: upstream/21.0.0~5101 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5e4aecec93204c832dc7756286f79fd8f1e4f8d2;p=platform%2Fupstream%2Fmesa.git radeonsi: introduce SI_RESOURCE_FLAG_INTERNAL / RADEON_FLAG_DRIVER_INTERNAL Tag allocations as driver internal. Some of these allocations will need to be doubled to handle TMZ (one secure bo, one normal bo) but these allocations shouldn't switch the winsys in "the app is using TMZ". Reviewed-by: Marek Olšák Part-of: --- diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index f923779..938e36c 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -69,6 +69,7 @@ enum radeon_bo_flag RADEON_FLAG_32BIT = (1 << 6), RADEON_FLAG_ENCRYPTED = (1 << 7), RADEON_FLAG_UNCACHED = (1 << 8), /* only gfx9 and newer */ + RADEON_FLAG_DRIVER_INTERNAL = (1 << 9), }; enum radeon_dependency_flag @@ -834,7 +835,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo /* Unsupported flags: NO_SUBALLOC, SPARSE. */ if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_UNCACHED | - RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) + RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT | + RADEON_FLAG_DRIVER_INTERNAL)) return -1; switch (domain) { diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 4e77e61..7103772 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1102,7 +1102,7 @@ resolve_to_temp: templ.usage = PIPE_USAGE_DEFAULT; templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE | SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(src->surface.micro_tile_mode) | - SI_RESOURCE_FLAG_DISABLE_DCC; + SI_RESOURCE_FLAG_DISABLE_DCC | SI_RESOURCE_FLAG_DRIVER_INTERNAL; /* The src and dst microtile modes must be the same. */ if (sctx->chip_class <= GFX8 && src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 7d6c4d3..2318ecf 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -182,6 +182,9 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, if (res->b.b.flags & SI_RESOURCE_FLAG_32BIT) res->flags |= RADEON_FLAG_32BIT; + if (res->b.b.flags & SI_RESOURCE_FLAG_DRIVER_INTERNAL) + res->flags |= RADEON_FLAG_DRIVER_INTERNAL; + /* For higher throughput and lower latency over PCIe assuming sequential access. * Only CP DMA, SDMA, and optimized compute benefit from this. * GFX8 and older don't support RADEON_FLAG_UNCACHED. @@ -479,7 +482,8 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resour struct si_resource *staging; assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); - staging = si_aligned_buffer_create(ctx->screen, SI_RESOURCE_FLAG_UNCACHED, + staging = si_aligned_buffer_create(ctx->screen, + SI_RESOURCE_FLAG_UNCACHED | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_STAGING, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), 256); if (staging) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 88d99ae..2830fb3 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -423,7 +423,9 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s si_resource_reference(&sctx->compute_scratch_buffer, NULL); sctx->compute_scratch_buffer = - si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + si_aligned_buffer_create(&sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, scratch_needed, sctx->screen->info.pte_fragment_size); if (!sctx->compute_scratch_buffer) diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 6cb461f..f5f767b 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -912,7 +912,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) unsigned num_oa_counters = VERTEX_COUNTER_GDS_MODE == 2 ? 2 : 0; if (gds_size) { - sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS, 0); + sctx->gds = ws->buffer_create(ws, gds_size, 4, RADEON_DOMAIN_GDS, + RADEON_FLAG_DRIVER_INTERNAL); if (!sctx->gds) return false; @@ -920,7 +921,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) } if (num_oa_counters) { assert(gds_size); - sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA, 0); + sctx->gds_oa = ws->buffer_create(ws, num_oa_counters, 1, RADEON_DOMAIN_OA, + RADEON_FLAG_DRIVER_INTERNAL); if (!sctx->gds_oa) return false; @@ -935,7 +937,8 @@ static bool si_initialize_prim_discard_cmdbuf(struct si_context *sctx) if (!sctx->index_ring) { sctx->index_ring = si_aligned_buffer_create( - sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, sctx->index_ring_size_per_ib * 2, sctx->screen->info.pte_fragment_size); if (!sctx->index_ring) return false; diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index e2fc6f3..133b451 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -265,7 +265,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns */ if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) { si_resource_reference(&sctx->scratch_buffer, NULL); - sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, + sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, scratch_size, 256); if (!sctx->scratch_buffer) return; diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index bc8f89c..5df3b76 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -150,7 +150,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) sctx->screen->debug_flags & DBG(SHADOW_REGS)) { sctx->shadowed_regs = si_aligned_buffer_create(sctx->b.screen, - SI_RESOURCE_FLAG_UNMAPPABLE, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, SI_SHADOWED_REG_BUFFER_SIZE, 4096); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 6a50dca..6a92e98 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -306,8 +306,8 @@ void si_allocate_gds(struct si_context *sctx) /* 4 streamout GDS counters. * We need 256B (64 dw) of GDS, otherwise streamout hangs. */ - sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, 0); - sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, 0); + sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, RADEON_FLAG_DRIVER_INTERNAL); + sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, RADEON_FLAG_DRIVER_INTERNAL); assert(sctx->gds && sctx->gds_oa); si_add_gds_to_buffer_list(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1a429d1..a7bef27 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -611,7 +611,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign if (sctx->chip_class >= GFX9 || si_compute_prim_discard_enabled(sctx)) { sctx->wait_mem_scratch = - si_aligned_buffer_create(screen, SI_RESOURCE_FLAG_UNMAPPABLE, + si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, 8, sscreen->info.tcc_cache_line_size); if (!sctx->wait_mem_scratch) @@ -622,7 +623,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ if (sctx->chip_class == GFX7) { sctx->null_const_buf.buffer = - pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, 16, + pipe_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, 16, sctx->screen->info.tcc_cache_line_size); if (!sctx->null_const_buf.buffer) goto fail; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 33ac498..de88869 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -123,6 +123,7 @@ #define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \ (((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3) #define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12) +#define SI_RESOURCE_FLAG_DRIVER_INTERNAL (PIPE_RESOURCE_FLAG_DRV_PRIV << 13) enum si_clear_code { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 14803b3..8e688cd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -886,7 +886,9 @@ bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader si_resource_reference(&shader->bo, NULL); shader->bo = si_aligned_buffer_create( - &sscreen->b, sscreen->info.cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY, + &sscreen->b, + (sscreen->info.cpdma_prefetch_writes_memory ? + 0 : SI_RESOURCE_FLAG_READ_ONLY) | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_IMMUTABLE, align(binary.rx_size, SI_CPDMA_ALIGNMENT), 256); if (!shader->bo) return false; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e75a401..8e10c9a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3400,7 +3400,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (update_esgs) { pipe_resource_reference(&sctx->esgs_ring, NULL); sctx->esgs_ring = - pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + pipe_aligned_buffer_create(sctx->b.screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, esgs_ring_size, sctx->screen->info.pte_fragment_size); if (!sctx->esgs_ring) return false; @@ -3409,7 +3411,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (update_gsvs) { pipe_resource_reference(&sctx->gsvs_ring, NULL); sctx->gsvs_ring = - pipe_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, + pipe_aligned_buffer_create(sctx->b.screen, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, gsvs_ring_size, sctx->screen->info.pte_fragment_size); if (!sctx->gsvs_ring) return false; @@ -3655,7 +3659,9 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) si_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = si_aligned_buffer_create( - &sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_needed_size, + &sctx->screen->b, + SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL, + PIPE_USAGE_DEFAULT, scratch_needed_size, sctx->screen->info.pte_fragment_size); if (!sctx->scratch_buffer) return false; @@ -3690,7 +3696,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) * receives the high 13 bits. */ sctx->tess_rings = pipe_aligned_buffer_create( - sctx->b.screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, + sctx->b.screen, SI_RESOURCE_FLAG_32BIT | SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, 1 << 19); if (!sctx->tess_rings) return; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index dda309a..b7f8590 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1165,10 +1165,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements; unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4); - tex->dcc_retile_buffer = si_aligned_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, + tex->dcc_retile_buffer = si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_DEFAULT, dcc_retile_map_size, sscreen->info.tcc_cache_line_size); - struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM, + struct si_resource *buf = si_aligned_buffer_create(screen, + SI_RESOURCE_FLAG_DRIVER_INTERNAL, PIPE_USAGE_STREAM, dcc_retile_map_size, sscreen->info.tcc_cache_line_size); void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_MAP_WRITE); @@ -1684,7 +1686,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou struct pipe_resource resource; struct si_texture *staging; unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; - unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR; + unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL; /* The pixel shader has a bad access pattern for linear textures. * If a pixel shader is used to blit to/from staging, don't disable caches.