intel/perf: use the new OA format for Gfx12.5+
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Fri, 25 Jun 2021 10:08:47 +0000 (13:08 +0300)
committerMarge Bot <emma+marge@anholt.net>
Thu, 17 Nov 2022 12:57:06 +0000 (12:57 +0000)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18893>

src/intel/perf/gen_perf.py
src/intel/perf/intel_perf.c
src/intel/perf/intel_perf_setup.h
src/intel/vulkan/anv_perf.c

index 2b31505..dd158ff 100644 (file)
@@ -964,6 +964,8 @@ def main():
 
             if gen.chipset == "hsw":
                 c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
+            elif gen.chipset.startswith("acm"):
+                c("struct intel_perf_query_info *query = xehp_query_alloc(perf, %u);\n" % len(counters))
             else:
                 c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
             c("\n")
index 07e6c0f..2ac9687 100644 (file)
@@ -1108,6 +1108,66 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
    result->reports_accumulated++;
 
    switch (query->oa_format) {
+   case I915_OA_FORMAT_A24u40_A14u32_B8_C8:
+      result->accumulator[query->gpu_time_offset] =
+         intel_perf_report_timestamp(query, end) -
+         intel_perf_report_timestamp(query, start);
+
+      accumulate_uint32(start + 3, end + 3,
+                        result->accumulator + query->gpu_clock_offset); /* clock */
+
+      /* A0-A3 counters are 32bits */
+      for (i = 0; i < 4; i++) {
+         accumulate_uint32(start + 4 + i, end + 4 + i,
+                           result->accumulator + query->a_offset + i);
+      }
+
+      /* A4-A23 counters are 40bits */
+      for (i = 4; i < 24; i++) {
+         accumulate_uint40(i, start, end,
+                           result->accumulator + query->a_offset + i);
+      }
+
+      /* A24-27 counters are 32bits */
+      for (i = 0; i < 4; i++) {
+         accumulate_uint32(start + 28 + i, end + 28 + i,
+                           result->accumulator + query->a_offset + 24 + i);
+      }
+
+      /* A28-31 counters are 40bits */
+      for (i = 28; i < 32; i++) {
+         accumulate_uint40(i, start, end,
+                           result->accumulator + query->a_offset + i);
+      }
+
+      /* A32-35 counters are 32bits */
+      for (i = 0; i < 4; i++) {
+         accumulate_uint32(start + 36 + i, end + 36 + i,
+                           result->accumulator + query->a_offset + 32 + i);
+      }
+
+      if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
+          !query->perf->sys_vars.query_mode) {
+         /* A36-37 counters are 32bits */
+         accumulate_uint32(start + 40, end + 40,
+                           result->accumulator + query->a_offset + 36);
+         accumulate_uint32(start + 46, end + 46,
+                           result->accumulator + query->a_offset + 37);
+
+         /* 8x 32bit B counters */
+         for (i = 0; i < 8; i++) {
+            accumulate_uint32(start + 48 + i, end + 48 + i,
+                              result->accumulator + query->b_offset + i);
+         }
+
+         /* 8x 32bit C counters... */
+         for (i = 0; i < 8; i++) {
+            accumulate_uint32(start + 56 + i, end + 56 + i,
+                              result->accumulator + query->c_offset + i);
+         }
+      }
+      break;
+
    case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
       result->accumulator[query->gpu_time_offset] =
          intel_perf_report_timestamp(query, end) -
index a5a97df..cfa5a3c 100644 (file)
@@ -72,6 +72,21 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
    return query;
 }
 
+static struct intel_perf_query_info *
+xehp_query_alloc(struct intel_perf_config *perf, int ncounters)
+{
+   struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
+   query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8;
+   query->gpu_time_offset = 0;
+   query->gpu_clock_offset = query->gpu_time_offset + 1;
+   query->a_offset = query->gpu_clock_offset + 1;
+   query->b_offset = query->a_offset + 38;
+   query->c_offset = query->b_offset + 8;
+   query->perfcnt_offset = query->c_offset + 8;
+   query->rpstat_offset = query->perfcnt_offset + 2;
+   return query;
+}
+
 struct intel_perf_query_counter_data {
    uint32_t name_idx;
    uint32_t desc_idx;
index 3e970b2..d6a82be 100644 (file)
@@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
    properties[p++] = metric_id;
 
    properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
-   properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
+   properties[p++] =
+      device->info->verx10 >= 125 ?
+      I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
+      I915_OA_FORMAT_A32u40_A4u32_B8_C8;
 
    properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
    properties[p++] = 31; /* slowest sampling period */