drm/i915/pmu: Prepare for multi-tile non-engine counters
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Fri, 19 May 2023 15:49:45 +0000 (08:49 -0700)
committerUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Mon, 22 May 2023 18:07:52 +0000 (11:07 -0700)
Reserve some bits in the counter config namespace which will carry the
tile id and prepare the code to handle this.

No per tile counters have been added yet.

v2:
- Fix checkpatch issues
- Use 4 bits for gt id in non-engine counters. Drop FIXME.
- Set MAX GTs to 4. Drop FIXME.

v3: (Ashutosh, Tvrtko)
- Drop BUG_ON that would never fire
- Make enable u64
- Pull in some code from next patch

v4: Set I915_PMU_MAX_GTS to 2 (Tvrtko)

v5: s/u64/u32 where needed (Ashutosh)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-7-umesh.nerlige.ramappa@intel.com
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
include/uapi/drm/i915_drm.h

index ecb57a9..5cfc322 100644 (file)
@@ -56,11 +56,21 @@ static bool is_engine_config(const u64 config)
        return config < __I915_PMU_OTHER(0);
 }
 
+static unsigned int config_gt_id(const u64 config)
+{
+       return config >> __I915_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+       return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
+}
+
 static unsigned int other_bit(const u64 config)
 {
        unsigned int val;
 
-       switch (config) {
+       switch (config_counter(config)) {
        case I915_PMU_ACTUAL_FREQUENCY:
                val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
                break;
@@ -78,7 +88,9 @@ static unsigned int other_bit(const u64 config)
                return -1;
        }
 
-       return I915_ENGINE_SAMPLE_COUNT + val;
+       return I915_ENGINE_SAMPLE_COUNT +
+              config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
+              val;
 }
 
 static unsigned int config_bit(const u64 config)
@@ -115,6 +127,18 @@ static unsigned int event_bit(struct perf_event *event)
        return config_bit(event->attr.config);
 }
 
+static u32 frequency_enabled_mask(void)
+{
+       unsigned int i;
+       u32 mask = 0;
+
+       for (i = 0; i < I915_PMU_MAX_GTS; i++)
+               mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
+                       config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
+
+       return mask;
+}
+
 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 {
        struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
@@ -131,9 +155,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
         * Mask out all the ones which do not need the timer, or in
         * other words keep all the ones that could need the timer.
         */
-       enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
-                 config_mask(I915_PMU_REQUESTED_FREQUENCY) |
-                 ENGINE_SAMPLE_MASK;
+       enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
 
        /*
         * When the GPU is idle per-engine counters do not need to be
@@ -175,9 +197,37 @@ static inline s64 ktime_since_raw(const ktime_t kt)
        return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
+static unsigned int
+__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+       unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
+
+       GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
+
+       return idx;
+}
+
+static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+       return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+}
+
+static void
+store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
+{
+       pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+}
+
+static void
+add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
+{
+       pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+}
+
 static u64 get_rc6(struct intel_gt *gt)
 {
        struct drm_i915_private *i915 = gt->i915;
+       const unsigned int gt_id = gt->info.id;
        struct i915_pmu *pmu = &i915->pmu;
        unsigned long flags;
        bool awake = false;
@@ -192,7 +242,7 @@ static u64 get_rc6(struct intel_gt *gt)
        spin_lock_irqsave(&pmu->lock, flags);
 
        if (awake) {
-               pmu->sample[__I915_SAMPLE_RC6].cur = val;
+               store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
        } else {
                /*
                 * We think we are runtime suspended.
@@ -201,14 +251,14 @@ static u64 get_rc6(struct intel_gt *gt)
                 * on top of the last known real value, as the approximated RC6
                 * counter value.
                 */
-               val = ktime_since_raw(pmu->sleep_last);
-               val += pmu->sample[__I915_SAMPLE_RC6].cur;
+               val = ktime_since_raw(pmu->sleep_last[gt_id]);
+               val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
        }
 
-       if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
-               val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
+       if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
+               val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
        else
-               pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
+               store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
 
        spin_unlock_irqrestore(&pmu->lock, flags);
 
@@ -218,13 +268,20 @@ static u64 get_rc6(struct intel_gt *gt)
 static void init_rc6(struct i915_pmu *pmu)
 {
        struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
-       intel_wakeref_t wakeref;
+       struct intel_gt *gt;
+       unsigned int i;
+
+       for_each_gt(gt, i915, i) {
+               intel_wakeref_t wakeref;
+
+               with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+                       u64 val = __get_rc6(gt);
 
-       with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
-               pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
-               pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
-                                       pmu->sample[__I915_SAMPLE_RC6].cur;
-               pmu->sleep_last = ktime_get_raw();
+                       store_sample(pmu, i, __I915_SAMPLE_RC6, val);
+                       store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
+                                    val);
+                       pmu->sleep_last[i] = ktime_get_raw();
+               }
        }
 }
 
@@ -232,8 +289,8 @@ static void park_rc6(struct intel_gt *gt)
 {
        struct i915_pmu *pmu = &gt->i915->pmu;
 
-       pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(gt);
-       pmu->sleep_last = ktime_get_raw();
+       store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+       pmu->sleep_last[gt->info.id] = ktime_get_raw();
 }
 
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
@@ -373,34 +430,30 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
        }
 }
 
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
-       sample->cur += mul_u32_u32(val, mul);
-}
-
-static bool frequency_sampling_enabled(struct i915_pmu *pmu)
+static bool
+frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
 {
        return pmu->enable &
-              (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
-               config_mask(I915_PMU_REQUESTED_FREQUENCY));
+              (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) |
+               config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt)));
 }
 
 static void
 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 {
        struct drm_i915_private *i915 = gt->i915;
+       const unsigned int gt_id = gt->info.id;
        struct i915_pmu *pmu = &i915->pmu;
        struct intel_rps *rps = &gt->rps;
 
-       if (!frequency_sampling_enabled(pmu))
+       if (!frequency_sampling_enabled(pmu, gt_id))
                return;
 
        /* Report 0/0 (actual/requested) frequency while parked. */
        if (!intel_gt_pm_get_if_awake(gt))
                return;
 
-       if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+       if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
                u32 val;
 
                /*
@@ -416,12 +469,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
                if (!val)
                        val = intel_gpu_freq(rps, rps->cur_freq);
 
-               add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
+               add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
                                val, period_ns / 1000);
        }
 
-       if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
-               add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
+       if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
+               add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
                                intel_rps_get_requested_frequency(rps),
                                period_ns / 1000);
        }
@@ -458,9 +511,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
                        continue;
 
                engines_sample(gt, period_ns);
-
-               if (i == 0) /* FIXME */
-                       frequency_sample(gt, period_ns);
+               frequency_sample(gt, period_ns);
        }
 
        hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
@@ -502,7 +553,13 @@ config_status(struct drm_i915_private *i915, u64 config)
 {
        struct intel_gt *gt = to_gt(i915);
 
-       switch (config) {
+       unsigned int gt_id = config_gt_id(config);
+       unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0;
+
+       if (gt_id > max_gt_id)
+               return -ENOENT;
+
+       switch (config_counter(config)) {
        case I915_PMU_ACTUAL_FREQUENCY:
                if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
                        /* Requires a mutex for sampling! */
@@ -513,6 +570,8 @@ config_status(struct drm_i915_private *i915, u64 config)
                        return -ENODEV;
                break;
        case I915_PMU_INTERRUPTS:
+               if (gt_id)
+                       return -ENOENT;
                break;
        case I915_PMU_RC6_RESIDENCY:
                if (!gt->rc6.supported)
@@ -610,22 +669,27 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
                        val = engine->pmu.sample[sample].cur;
                }
        } else {
-               switch (event->attr.config) {
+               const unsigned int gt_id = config_gt_id(event->attr.config);
+               const u64 config = config_counter(event->attr.config);
+
+               switch (config) {
                case I915_PMU_ACTUAL_FREQUENCY:
                        val =
-                          div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
+                          div_u64(read_sample(pmu, gt_id,
+                                              __I915_SAMPLE_FREQ_ACT),
                                   USEC_PER_SEC /* to MHz */);
                        break;
                case I915_PMU_REQUESTED_FREQUENCY:
                        val =
-                          div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
+                          div_u64(read_sample(pmu, gt_id,
+                                              __I915_SAMPLE_FREQ_REQ),
                                   USEC_PER_SEC /* to MHz */);
                        break;
                case I915_PMU_INTERRUPTS:
                        val = READ_ONCE(pmu->irq_count);
                        break;
                case I915_PMU_RC6_RESIDENCY:
-                       val = get_rc6(to_gt(i915));
+                       val = get_rc6(i915->gt[gt_id]);
                        break;
                case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
                        val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
index 3a81126..33d80fb 100644 (file)
@@ -38,13 +38,16 @@ enum {
        __I915_NUM_PMU_SAMPLERS
 };
 
+#define I915_PMU_MAX_GTS 2
+
 /*
  * How many different events we track in the global PMU mask.
  *
  * It is also used to know to needed number of event reference counters.
  */
 #define I915_PMU_MASK_BITS \
-       (I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT)
+       (I915_ENGINE_SAMPLE_COUNT + \
+        I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT)
 
 #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
 
@@ -124,11 +127,11 @@ struct i915_pmu {
         * Only global counters are held here, while the per-engine ones are in
         * struct intel_engine_cs.
         */
-       struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
+       struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
        /**
         * @sleep_last: Last time GT parked for RC6 estimation.
         */
-       ktime_t sleep_last;
+       ktime_t sleep_last[I915_PMU_MAX_GTS];
        /**
         * @irq_count: Number of interrupts
         *
index ba40855..f31dfac 100644 (file)
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+       (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+       ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
 
 #define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
 #define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
 
 #define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
 
+#define __I915_PMU_ACTUAL_FREQUENCY(gt)                ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt)     ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt)              ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt)           ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt)  ___I915_PMU_OTHER(gt, 4)
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255        /* table size 2k - maximum due to use