From 66045acdf9969e1fc1becb033287b5c52342207b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 10 Mar 2022 12:18:51 +0200 Subject: [PATCH] intel/perf: add max vfuncs New counters will use those from inside their read function to generate percentage numbers. v2: Forgot to update Iris (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Ian Romanick (v1) Part-of: --- src/gallium/drivers/iris/iris_monitor.c | 27 +++-- src/gallium/drivers/iris/iris_performance_query.c | 14 ++- src/intel/perf/gen_perf.py | 119 +++++++++++++++------- src/intel/perf/intel_perf.h | 29 ++++-- 4 files changed, 138 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c index 54d2e66..f24db9b 100644 --- a/src/gallium/drivers/iris/iris_monitor.c +++ b/src/gallium/drivers/iris/iris_monitor.c @@ -42,8 +42,8 @@ int iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) { - const struct iris_screen *screen = (struct iris_screen *)pscreen; - const struct intel_perf_config *perf_cfg = screen->perf_cfg; + struct iris_screen *screen = (struct iris_screen *)pscreen; + struct intel_perf_config *perf_cfg = screen->perf_cfg; assert(perf_cfg); if (!perf_cfg) return 0; @@ -54,7 +54,12 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index, } struct intel_perf_query_counter_info *counter_info = &perf_cfg->counter_infos[index]; + struct intel_perf_query_info *query_info = + &perf_cfg->queries[intel_perf_query_counter_info_first_query(counter_info)]; struct intel_perf_query_counter *counter = counter_info->counter; + struct intel_perf_query_result results; + + intel_perf_query_result_clear(&results); info->group_id = counter_info->location.group_idx; info->name = counter->name; @@ -66,19 +71,27 @@ iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index, info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; switch (counter->data_type) { case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: - case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: + case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: { info->type = PIPE_DRIVER_QUERY_TYPE_UINT; - assert(counter->raw_max <= UINT32_MAX); - info->max_value.u32 = (uint32_t)counter->raw_max; + uint64_t val = + counter->oa_counter_max_uint64 ? + counter->oa_counter_max_uint64(perf_cfg, query_info, &results) : 0; + assert(val <= UINT32_MAX); + info->max_value.u32 = (uint32_t)val; break; + } case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; - info->max_value.u64 = counter->raw_max; + info->max_value.u64 = + counter->oa_counter_max_uint64 ? + counter->oa_counter_max_uint64(perf_cfg, query_info, &results) : 0; break; case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT; - info->max_value.f = counter->raw_max; + info->max_value.f = + counter->oa_counter_max_float ? + counter->oa_counter_max_float(perf_cfg, query_info, &results) : 0.0f; break; default: assert(false); diff --git a/src/gallium/drivers/iris/iris_performance_query.c b/src/gallium/drivers/iris/iris_performance_query.c index d4a1d46..5f19878 100644 --- a/src/gallium/drivers/iris/iris_performance_query.c +++ b/src/gallium/drivers/iris/iris_performance_query.c @@ -176,6 +176,9 @@ iris_get_perf_counter_info(struct pipe_context *pipe, const struct intel_perf_query_info *info = &perf_cfg->queries[query_index]; const struct intel_perf_query_counter *counter = &info->counters[counter_index]; + struct intel_perf_query_result results; + + intel_perf_query_result_clear(&results); *name = counter->name; *desc = counter->desc; @@ -183,7 +186,16 @@ iris_get_perf_counter_info(struct pipe_context *pipe, *data_size = intel_perf_query_counter_get_size(counter); *type_enum = counter->type; *data_type_enum = counter->data_type; - *raw_max = counter->raw_max; + + if (counter->oa_counter_max_uint64) { + if (counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_FLOAT || + counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE) + *raw_max = counter->oa_counter_max_float(perf_cfg, info, &results); + else + *raw_max = counter->oa_counter_max_uint64(perf_cfg, info, &results); + } else { + *raw_max = 0; + } } static void diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index 36b81da..5ff0245 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -355,14 +355,14 @@ def output_counter_read(gen, set, counter): def output_counter_max(gen, set, counter): max_eq = counter.get('max_equation') - if not counter.has_max_func(): + if not counter.has_custom_max_func(): return c("\n") c("/* {0} :: {1} */".format(set.name, counter.get('name'))) if counter.max_hash in hashed_funcs: - c("#define %s \\" % counter.max_sym()) + c("#define %s \\" % counter.max_sym) c_indent(3) c("%s" % hashed_funcs[counter.max_hash]) c_outdent(3) @@ -372,14 +372,18 @@ def output_counter_max(gen, set, counter): ret_type = "uint64_t" c("static " + ret_type) - c(counter.max_sym() + "(struct intel_perf_config *perf)\n") + c(counter.max_sym + "(struct intel_perf_config *perf,\n") + c_indent(len(counter.read_sym) + 1) + c("const struct intel_perf_query_info *query,\n") + c("const struct intel_perf_query_result *results)\n") + c_outdent(len(counter.read_sym) + 1) c("{") c_indent(3) output_rpn_equation_code(set, counter, max_eq) c_outdent(3) c("}") - hashed_funcs[counter.max_hash] = counter.max_sym() + hashed_funcs[counter.max_hash] = counter.max_sym c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 } @@ -515,10 +519,17 @@ def output_counter_report(set, counter, counter_to_idx, current_offset): current_offset = pot_align(current_offset, sizeof(c_type)) - c("intel_perf_query_add_counter(query, " + idx + ", " + - str(current_offset) + ", " + - set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" + - set.read_funcs[counter.get('symbol_name')] + ");\n") + if data_type == 'uint64': + c("intel_perf_query_add_counter_uint64(query, " + idx + ", " + + str(current_offset) + ", " + + set.max_funcs[counter.get('symbol_name')] + "," + + set.read_funcs[counter.get('symbol_name')] + ");\n") + else: + c("intel_perf_query_add_counter_float(query, " + idx + ", " + + str(current_offset) + ", " + + set.max_funcs[counter.get('symbol_name')] + "," + + set.read_funcs[counter.get('symbol_name')] + ");\n") + if availability: c_outdent(3); @@ -607,6 +618,7 @@ class Counter: self.read_sym = "{0}__{1}__{2}__read".format(self.set.gen.chipset, self.set.underscore_name, self.xml.get('underscore_name')) + self.max_sym = self.build_max_sym() def get(self, prop): return self.xml.get(prop) @@ -632,45 +644,44 @@ class Counter: if max_eq: self.max_hash = ' '.join(map(replace_token, max_eq.split())) - def has_max_func(self): + def has_custom_max_func(self): max_eq = self.xml.get('max_equation') if not max_eq: return False try: val = float(max_eq) - return False + if val == 100: + return False except ValueError: pass for token in max_eq.split(): - if token[0] == '$' and resolve_variable(token, self.set, False) == None: + if token[0] == '$' and resolve_variable(token, self.set, True) == None: + print("unresolved token " + token) return False return True - def max_sym(self): - assert self.has_max_func() - return "{0}__{1}__{2}__max".format(self.set.gen.chipset, - self.set.underscore_name, - self.xml.get('underscore_name')) - - def max_value(self): + def build_max_sym(self): max_eq = self.xml.get('max_equation') if not max_eq: - return "0 /* undefined */" + return "NULL" try: - return "{0}".format(float(max_eq)) + val = float(max_eq) + if val == 100: + if self.xml.get('data_type') == 'uint64': + return "percentage_max_uint64" + else: + return "percentage_max_float" except ValueError: pass - for token in max_eq.split(): - if token[0] == '$' and resolve_variable(token, self.set, False) == None: - return "0 /* unsupported (varies over time) */" + assert self.has_custom_max_func() + return "{0}__{1}__{2}__max".format(self.set.gen.chipset, + self.set.underscore_name, + self.xml.get('underscore_name')) - return "{0}__{1}__{2}__max(perf)".format(self.set.gen.chipset, - self.set.underscore_name, - self.xml.get('underscore_name')) # Wraps a element from the oa-*.xml files. class Set: @@ -679,7 +690,7 @@ class Set: self.xml = xml self.counter_vars = {} - self.max_values = {} + self.max_funcs = {} self.read_funcs = {} xml_counters = self.xml.findall("counter") @@ -689,7 +700,7 @@ class Set: self.counters.append(counter) self.counter_vars['$' + counter.get('symbol_name')] = counter self.read_funcs[counter.get('symbol_name')] = counter.read_sym - self.max_values[counter.get('symbol_name')] = counter.max_value() + self.max_funcs[counter.get('symbol_name')] = counter.max_sym for counter in self.counters: counter.compute_hashes() @@ -856,13 +867,11 @@ def main(): c("};\n\n") c(textwrap.dedent("""\ - typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf, - const struct intel_perf_query_info *query, - const struct intel_perf_query_result *results); static void ATTRIBUTE_NOINLINE - intel_perf_query_add_counter(struct intel_perf_query_info *query, - int counter_idx, size_t offset, - uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64) + intel_perf_query_add_counter_uint64(struct intel_perf_query_info *query, + int counter_idx, size_t offset, + intel_counter_read_uint64_t oa_counter_max, + intel_counter_read_uint64_t oa_counter_read) { struct intel_perf_query_counter *dest = &query->counters[query->n_counters++]; const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; @@ -871,13 +880,51 @@ def main(): dest->desc = &desc[counter->desc_idx]; dest->symbol_name = &symbol_name[counter->symbol_name_idx]; dest->category = &category[counter->category_idx]; - dest->raw_max = raw_max; dest->offset = offset; dest->type = counter->type; dest->data_type = counter->data_type; dest->units = counter->units; - dest->oa_counter_read_uint64 = oa_counter_read_uint64; + dest->oa_counter_max_uint64 = oa_counter_max; + dest->oa_counter_read_uint64 = oa_counter_read; + } + + static void ATTRIBUTE_NOINLINE + intel_perf_query_add_counter_float(struct intel_perf_query_info *query, + int counter_idx, size_t offset, + intel_counter_read_float_t oa_counter_max, + intel_counter_read_float_t oa_counter_read) + { + struct intel_perf_query_counter *dest = &query->counters[query->n_counters++]; + const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; + + dest->name = &name[counter->name_idx]; + dest->desc = &desc[counter->desc_idx]; + dest->symbol_name = &symbol_name[counter->symbol_name_idx]; + dest->category = &category[counter->category_idx]; + + dest->offset = offset; + dest->type = counter->type; + dest->data_type = counter->data_type; + dest->units = counter->units; + dest->oa_counter_max_float = oa_counter_max; + dest->oa_counter_read_float = oa_counter_read; + } + + static float ATTRIBUTE_NOINLINE + percentage_max_float(struct intel_perf_config *perf, + const struct intel_perf_query_info *query, + const struct intel_perf_query_result *results) + { + return 100; + } + + static uint64_t ATTRIBUTE_NOINLINE + percentage_max_uint64(struct intel_perf_config *perf, + const struct intel_perf_query_info *query, + const struct intel_perf_query_result *results) + { + return 100; } """)) diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index 3b0825e..6f26041 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -37,6 +37,7 @@ #include "compiler/glsl/list.h" #include "dev/intel_device_info.h" +#include "util/bitscan.h" #include "util/hash_table.h" #include "util/ralloc.h" @@ -178,6 +179,14 @@ struct intel_perf_query_result { bool query_disjoint; }; +typedef uint64_t (*intel_counter_read_uint64_t)(struct intel_perf_config *perf, + const struct intel_perf_query_info *query, + const struct intel_perf_query_result *results); + +typedef float (*intel_counter_read_float_t)(struct intel_perf_config *perf, + const struct intel_perf_query_info *query, + const struct intel_perf_query_result *results); + struct intel_perf_query_counter { const char *name; const char *desc; @@ -186,16 +195,16 @@ struct intel_perf_query_counter { enum intel_perf_counter_type type; enum intel_perf_counter_data_type data_type; enum intel_perf_counter_units units; - uint64_t raw_max; size_t offset; union { - uint64_t (*oa_counter_read_uint64)(struct intel_perf_config *perf, - const struct intel_perf_query_info *query, - const struct intel_perf_query_result *results); - float (*oa_counter_read_float)(struct intel_perf_config *perf, - const struct intel_perf_query_info *query, - const struct intel_perf_query_result *results); + intel_counter_read_uint64_t oa_counter_max_uint64; + intel_counter_read_float_t oa_counter_max_float; + }; + + union { + intel_counter_read_uint64_t oa_counter_read_uint64; + intel_counter_read_float_t oa_counter_read_float; struct intel_pipeline_stat pipeline_stat; }; }; @@ -429,6 +438,12 @@ uint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int const struct intel_perf_registers *config, const char *guid); +static inline unsigned +intel_perf_query_counter_info_first_query(const struct intel_perf_query_counter_info *counter_info) +{ + return ffsll(counter_info->query_mask); +} + /** Read the slice/unslice frequency from 2 OA reports and store then into * result. */ -- 2.7.4