intel/perf: use a function to do common allocations
authorDave Airlie <airlied@redhat.com>
Thu, 20 Jan 2022 01:36:16 +0000 (11:36 +1000)
committerMarge Bot <emma+marge@anholt.net>
Thu, 20 Jan 2022 06:41:17 +0000 (06:41 +0000)
This cuts the compile time down for this file on my ryzen from
real 1m4.077s
to
real 0m30.827s

Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14630>

src/intel/perf/gen_perf.py

index bdaeb68..41c4b3c 100644 (file)
@@ -691,7 +691,48 @@ def main():
         #define MIN(a, b) ((a < b) ? (a) : (b))
         #define MAX(a, b) ((a > b) ? (a) : (b))
 
-
+        static struct intel_perf_query_info *
+        intel_query_alloc(struct intel_perf_config *perf, int ncounters)
+        {
+           struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);
+           query->perf = perf;
+           query->kind = INTEL_PERF_QUERY_TYPE_OA;
+           query->n_counters = 0;
+           query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
+           query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
+           return query;
+        }
+
+        static struct intel_perf_query_info *
+        hsw_query_alloc(struct intel_perf_config *perf, int ncounters)
+        {
+           struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
+           query->oa_format = I915_OA_FORMAT_A45_B8_C8;
+           /* Accumulation buffer offsets... */
+           query->gpu_time_offset = 0;
+           query->a_offset = query->gpu_time_offset + 1;
+           query->b_offset = query->a_offset + 45;
+           query->c_offset = query->b_offset + 8;
+           query->perfcnt_offset = query->c_offset + 8;
+           query->rpstat_offset = query->perfcnt_offset + 2;
+           return query;
+        }
+
+        static struct intel_perf_query_info *
+        bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
+        {
+           struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
+           query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+           /* Accumulation buffer offsets... */
+           query->gpu_time_offset = 0;
+           query->gpu_clock_offset = query->gpu_time_offset + 1;
+           query->a_offset = query->gpu_clock_offset + 1;
+           query->b_offset = query->a_offset + 36;
+           query->c_offset = query->b_offset + 8;
+           query->perfcnt_offset = query->c_offset + 8;
+           query->rpstat_offset = query->perfcnt_offset + 2;
+           return query;
+        }
         """))
 
     # Print out all equation functions.
@@ -713,43 +754,15 @@ def main():
             c("{\n")
             c_indent(3)
 
-            c("struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);\n")
+            if gen.chipset == "hsw":
+                c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
+            else:
+                c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
             c("\n")
-            c("query->perf = perf;\n")
-            c("query->kind = INTEL_PERF_QUERY_TYPE_OA;\n")
             c("query->name = \"" + set.name + "\";\n")
             c("query->symbol_name = \"" + set.symbol_name + "\";\n")
             c("query->guid = \"" + set.hw_config_guid + "\";\n")
 
-            c("query->counters = rzalloc_array(query, struct intel_perf_query_counter, %u);" % len(counters))
-            c("query->n_counters = 0;")
-            c("query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */")
-
-            if gen.chipset == "hsw":
-                c(textwrap.dedent("""\
-                    query->oa_format = I915_OA_FORMAT_A45_B8_C8;
-                    /* Accumulation buffer offsets... */
-                    query->gpu_time_offset = 0;
-                    query->a_offset = query->gpu_time_offset + 1;
-                    query->b_offset = query->a_offset + 45;
-                    query->c_offset = query->b_offset + 8;
-                    query->perfcnt_offset = query->c_offset + 8;
-                    query->rpstat_offset = query->perfcnt_offset + 2;
-                """))
-            else:
-                c(textwrap.dedent("""\
-                    query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
-                    /* Accumulation buffer offsets... */
-                    query->gpu_time_offset = 0;
-                    query->gpu_clock_offset = query->gpu_time_offset + 1;
-                    query->a_offset = query->gpu_clock_offset + 1;
-                    query->b_offset = query->a_offset + 36;
-                    query->c_offset = query->b_offset + 8;
-                    query->perfcnt_offset = query->c_offset + 8;
-                    query->rpstat_offset = query->perfcnt_offset + 2;
-                """))
-
-
             c("\n")
             c("struct intel_perf_query_counter *counter = query->counters;\n")