From: Alyssa Rosenzweig Date: Sat, 11 Mar 2023 20:39:09 +0000 (-0500) Subject: agx: Add helper for calculating occupancy X-Git-Tag: upstream/23.3.3~10337 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e7139838754ee7dfc56bd22478f969b4ca5e9c8e;p=platform%2Fupstream%2Fmesa.git agx: Add helper for calculating occupancy Add information about the relationship between program register usage and program occupancy (the maximum number of threads that may execute concurrently on a single shader core). This table is derived from studying the maxTotalThreadsPerThreadgroup property in Metal while varying the register usage, something I blogged about a few years back. It's probably not 100% accurate and it hasn't been tested against hardware, but it matters "only" for performance (not correctness) so I'm not super stressed about the details. In the (near) future, RA will be able to make use of this information to know exactly when it can use more registers without hurting performance. In the present, it's just used for better shader-db statistics. Signed-off-by: Alyssa Rosenzweig Part-of: --- diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index cd4a543..f6550da 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1805,8 +1805,8 @@ agx_dump_stats(agx_context *ctx, unsigned size, char **out) agx_foreach_instr_global(ctx, I) nr_ins++; - /* TODO: Pipe through occupancy */ - unsigned nr_threads = 1; + unsigned nr_threads = + agx_occupancy_for_register_count(ctx->max_reg).max_threads; return asprintf(out, "%s shader: %u inst, %u bytes, %u halfregs, %u threads, " diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 4251a4c..d04e551 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -802,6 +802,13 @@ bool agx_nir_lower_ubo(nir_shader *shader); bool agx_nir_lower_shared_bitsize(nir_shader *shader); bool agx_nir_lower_frag_sidefx(nir_shader *s); +struct agx_occupancy { + unsigned max_registers; + unsigned max_threads; +}; + +struct agx_occupancy agx_occupancy_for_register_count(unsigned halfregs); + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/asahi/compiler/agx_performance.c b/src/asahi/compiler/agx_performance.c new file mode 100644 index 0000000..e277540 --- /dev/null +++ b/src/asahi/compiler/agx_performance.c @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "agx_compiler.h" + +/* Table describing the relationship between registers pressure and thread + * count. Each entry describes a maximum number of registers and the associated + * best-case thread count. + * + * Sorted in ascending order of maximum registers for easy lookup. + */ +static const struct agx_occupancy occupancies[] = { + {104, 1024}, {112, 896}, {128, 832}, {136, 768}, {144, 704}, + {160, 640}, {184, 576}, {208, 512}, {232, 448}, {256, 384}, +}; + +struct agx_occupancy +agx_occupancy_for_register_count(unsigned halfregs) +{ + for (unsigned i = 0; i < ARRAY_SIZE(occupancies); ++i) { + unsigned max = occupancies[i].max_registers; + assert((i == 0 || max > occupancies[i - 1].max_registers) && "ascending"); + + if (halfregs <= max) + return occupancies[i]; + } + + unreachable("Register count must be less than the maximum"); +} diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 844cd85..3dfa535 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -20,6 +20,7 @@ libasahi_agx_files = files( 'agx_lower_pseudo.c', 'agx_lower_uniform_sources.c', 'agx_pack.c', + 'agx_performance.c', 'agx_print.c', 'agx_ir.c', 'agx_opt_cse.c',