intel/perf: Use a function to initialize perf counters
authorMatt Turner <mattst88@gmail.com>
Thu, 3 Mar 2022 01:53:02 +0000 (17:53 -0800)
committerMarge Bot <emma+marge@anholt.net>
Mon, 7 Mar 2022 21:09:54 +0000 (21:09 +0000)
And specifically mark it with ATTRIBUTE_NOINLINE. Otherwise it will be
inlined and actually slightly increase code size.

Cuts 505 KiB from iris_dri.so and libvulkan_intel.so.

   text    data     bss     dec     hex filename
1538720       0       0 1538720  177aa0 meson-generated_.._intel_perf_metrics.c.o (before)
 926811   43200       0  970011   ecd1b meson-generated_.._intel_perf_metrics.c.o (after)

   text    data     bss     dec     hex filename
14751700 365708  210004 15327412 e9e0b4 iris_dri.so (before)
14190852 408908  210004 14809764 e1faa4 iris_dri.so (after)

   text    data     bss     dec     hex filename
8744913  214264   22820 8981997  890ded libvulkan_intel.so (before)
8184097  257464   22820 8464381  8127fd libvulkan_intel.so (after)

Relocations increase because the counter initializations are moved from
code (in .text) to pointers (in .text) to .rodata, which require
relocations.

relinfo:
iris_dri.so (before): 15605 relocations, 15385 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users
iris_dri.so (after) : 17765 relocations, 17545 relative (98%), 452 PLT entries, 1 for local syms (0%), 0 users

libvulkan_intel.so (before):  8560 relocations, 4829 relative (56%), 355 PLT entries, 1 for local syms (0%), 0 users
libvulkan_intel.so (after) : 10720 relocations, 6989 relative (65%), 355 PLT entries, 1 for local syms (0%), 0 users

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15237>

src/intel/perf/gen_perf.py

index 6b0e164..f4e766f 100644 (file)
@@ -461,19 +461,13 @@ def output_counter_report(set, counter, counter_to_idx, current_offset):
     key = counter_key(counter)
     idx = str(counter_to_idx[key])
 
-    c("counter = &query->counters[query->n_counters++];\n")
-    c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n")
-    c("counter->name = counters[" + idx + "].name;\n")
-    c("counter->desc = counters[" + idx + "].desc;\n")
-    c("counter->symbol_name = counters[" + idx + "].symbol_name;\n")
-    c("counter->category = counters[" + idx + "].category;\n")
-    c("counter->type = counters[" + idx + "].type;\n")
-    c("counter->data_type = counters[" + idx + "].data_type;\n")
-    c("counter->units = counters[" + idx + "].units;\n")
-    c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n")
-
     current_offset = pot_align(current_offset, sizeof(c_type))
-    c("counter->offset = " + str(current_offset) + ";\n")
+
+    c("counter = &query->counters[query->n_counters++];\n")
+    c("intel_perf_query_add_counter(counter, " + idx + ", " +
+        str(current_offset) + ", " +
+        set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" +
+        set.read_funcs[counter.get('symbol_name')] + ");\n")
 
     if availability:
         c_outdent(3);
@@ -757,7 +751,32 @@ def main():
                     idx += 1
 
     c_outdent(3)
-    c("};\n")
+    c("};\n\n")
+
+    c(textwrap.dedent("""\
+        typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf,
+                                                 const struct intel_perf_query_info *query,
+                                                 const struct intel_perf_query_result *results);
+        static void ATTRIBUTE_NOINLINE
+        intel_perf_query_add_counter(struct intel_perf_query_counter *dest,
+                                     int counter_idx, size_t offset,
+                                     uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64)
+        {
+           const struct intel_perf_query_counter *counter = &counters[counter_idx];
+
+           dest->name = counter->name;
+           dest->desc = counter->desc;
+           dest->symbol_name = counter->symbol_name;
+           dest->category = counter->category;
+           dest->raw_max = raw_max;
+
+           dest->offset = offset;
+           dest->type = counter->type;
+           dest->data_type = counter->data_type;
+           dest->units = counter->units;
+           dest->oa_counter_read_uint64 = oa_counter_read_uint64;
+        }
+        """))
 
     # Print out all metric sets registration functions for each set in each
     # generation.