r300: cycles estimate for shader-db
authorPavel Ondračka <pavel.ondracka@gmail.com>
Fri, 14 Jul 2023 08:05:35 +0000 (10:05 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 20 Jul 2023 06:37:10 +0000 (06:37 +0000)
To account for:
- macro MAD in vs
- NOPs needed before presubtract
- texture scheduling and a proper texture semaphore usage

The docs don't mention any other references to extra cycles, so otherwise
we assume 1 instruction = 1 cycle.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7573
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24152>

src/gallium/drivers/r300/compiler/radeon_compiler.c
src/gallium/drivers/r300/compiler/radeon_compiler.h

index 44a63f3..6f33bbe 100644 (file)
@@ -357,15 +357,24 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
 {
        struct rc_instruction * tmp;
        memset(s, 0, sizeof(*s));
+       unsigned ip = 0;
+       unsigned last_begintex = 0;
 
        for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
-                                                       tmp = tmp->Next){
+                                                       tmp = tmp->Next, ip++){
                const struct rc_opcode_info * info;
                rc_for_all_reads_mask(tmp, reg_count_callback, s);
                if (tmp->Type == RC_INSTRUCTION_NORMAL) {
                        info = rc_get_opcode_info(tmp->U.I.Opcode);
-                       if (info->Opcode == RC_OPCODE_BEGIN_TEX)
+                       if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
+                               /* The R5xx docs mention ~30 cycles in section 8.3.1 */
+                               s->num_cycles += 30;
+                               last_begintex = ip;
                                continue;
+                       }
+                       if (info->Opcode == RC_OPCODE_MAD &&
+                               rc_inst_has_three_diff_temp_srcs(tmp))
+                               s->num_cycles++;
                } else {
                        if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
                                s->num_presub_ops++;
@@ -385,6 +394,13 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
                                tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
                                s->num_omod_ops++;
                        }
+                       if (tmp->U.P.Nop)
+                               s->num_cycles++;
+                       /* SemWait has effect only on R500, the more instructions we can put
+                        * between the tex block and the first texture semaphore, the better.
+                        */
+                       if (tmp->U.P.SemWait && c->is_r500)
+                               s->num_cycles -= ip - last_begintex;
                        info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
                }
                if (info->IsFlowControl) {
@@ -400,6 +416,7 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
                if (info->HasTexture)
                        s->num_tex_insts++;
                s->num_insts++;
+               s->num_cycles++;
        }
        /* Increment here because the reg_count_callback store the max
         * temporary reg index in s->nun_temp_regs. */
@@ -416,11 +433,14 @@ static void print_stats(struct radeon_compiler * c)
         * only the FS has, because shader-db's report.py wants all shaders to
         * have the same set.
         */
-       util_debug_message(c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, %u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits",
+       util_debug_message(c->debug, SHADER_INFO,
+                          "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol,"
+                          "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
                           c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
                           s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
                           s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
-                          s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals);
+                          s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals,
+                          s.num_cycles);
 }
 
 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
index cbedabb..0e4321f 100644 (file)
@@ -147,6 +147,7 @@ struct radeon_compiler_pass {
 };
 
 struct rc_program_stats {
+       unsigned num_cycles;
        unsigned num_consts;
        unsigned num_insts;
        unsigned num_fc_insts;