drm/i915: Track context current active time

author Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Fri, 1 Apr 2022 14:22:02 +0000 (15:22 +0100)

committer Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Tue, 5 Apr 2022 07:39:10 +0000 (08:39 +0100)
author Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fri, 1 Apr 2022 14:22:02 +0000 (15:22 +0100)
committer Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Tue, 5 Apr 2022 07:39:10 +0000 (08:39 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c

index d87145b..4070cb5 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -386,7 +386,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
         ce->ring = NULL;
         ce->ring_size = SZ_4K;
  
         ce->ring = NULL;
         ce->ring_size = SZ_4K;
  
-       ewma_runtime_init(&ce->runtime.avg);
+       ewma_runtime_init(&ce->stats.runtime.avg);
  
         ce->vm = i915_vm_get(engine->gt->vm);
  
  
         ce->vm = i915_vm_get(engine->gt->vm);
  
@@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent,
         child->parallel.parent = parent;
  }
  
         child->parallel.parent = parent;
  }
  
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+{
+       u64 total, active;
+
+       total = ce->stats.runtime.total;
+       if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+               total *= ce->engine->gt->clock_period_ns;
+
+       active = READ_ONCE(ce->stats.active);
+       if (active)
+               active = intel_context_clock() - active;
+
+       return total + active;
+}
+
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+       u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
+
+       if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+               avg *= ce->engine->gt->clock_period_ns;
+
+       return avg;
+}
+
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  #include "selftest_context.c"
  #endif
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  #include "selftest_context.c"
  #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h

index d8c74bb..b7d3214 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -351,18 +351,13 @@ intel_context_clear_nopreempt(struct intel_context *ce)
         clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
  }
  
         clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
  }
  
-static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
-{
-       const u32 period = ce->engine->gt->clock_period_ns;
-
-       return READ_ONCE(ce->runtime.total) * period;
-}
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
  
  
-static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+static inline u64 intel_context_clock(void)
  {
  {
-       const u32 period = ce->engine->gt->clock_period_ns;
-
-       return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+       /* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */
+       return ktime_get_raw_fast_ns();
  }
  
  #endif /* __INTEL_CONTEXT_H__ */
  }
  
  #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h

index 30cd81a..09f8254 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -35,6 +35,9 @@ struct intel_context_ops {
  #define COPS_HAS_INFLIGHT_BIT 0
  #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
  
  #define COPS_HAS_INFLIGHT_BIT 0
  #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
  
+#define COPS_RUNTIME_CYCLES_BIT 1
+#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)
+
         int (*alloc)(struct intel_context *ce);
  
         void (*ban)(struct intel_context *ce, struct i915_request *rq);
         int (*alloc)(struct intel_context *ce);
  
         void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -134,14 +137,19 @@ struct intel_context {
         } lrc;
         u32 tag; /* cookie passed to HW to track this context on submission */
  
         } lrc;
         u32 tag; /* cookie passed to HW to track this context on submission */
  
-       /* Time on GPU as tracked by the hw. */
-       struct {
-               struct ewma_runtime avg;
-               u64 total;
-               u32 last;
-               I915_SELFTEST_DECLARE(u32 num_underflow);
-               I915_SELFTEST_DECLARE(u32 max_underflow);
-       } runtime;
+       /** stats: Context GPU engine busyness tracking. */
+       struct intel_context_stats {
+               u64 active;
+
+               /* Time on GPU as tracked by the hw. */
+               struct {
+                       struct ewma_runtime avg;
+                       u64 total;
+                       u32 last;
+                       I915_SELFTEST_DECLARE(u32 num_underflow);
+                       I915_SELFTEST_DECLARE(u32 max_underflow);
+               } runtime;
+       } stats;
  
         unsigned int active_count; /* protected by timeline->mutex */
  
  
         unsigned int active_count; /* protected by timeline->mutex */
  
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c

index e181029..94d41a0 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -624,8 +624,6 @@ static void __execlists_schedule_out(struct i915_request * const rq,
                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
                 __set_bit(ccid - 1, &engine->context_tag);
         }
                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
                 __set_bit(ccid - 1, &engine->context_tag);
         }
-
-       lrc_update_runtime(ce);
         intel_engine_context_out(engine);
         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
         if (engine->fw_domain && !--engine->fw_active)
         intel_engine_context_out(engine);
         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
         if (engine->fw_domain && !--engine->fw_active)
@@ -2004,8 +2002,23 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
          * and merits a fresh timeslice. We reinstall the timer after
          * inspecting the queue to see if we need to resumbit.
          */
          * and merits a fresh timeslice. We reinstall the timer after
          * inspecting the queue to see if we need to resumbit.
          */
-       if (*prev != *execlists->active) /* elide lite-restores */
+       if (*prev != *execlists->active) { /* elide lite-restores */
+               /*
+                * Note the inherent discrepancy between the HW runtime,
+                * recorded as part of the context switch, and the CPU
+                * adjustment for active contexts. We have to hope that
+                * the delay in processing the CS event is very small
+                * and consistent. It works to our advantage to have
+                * the CPU adjustment _undershoot_ (i.e. start later than)
+                * the CS timestamp so we never overreport the runtime
+                * and correct overselves later when updating from HW.
+                */
+               if (*prev)
+                       lrc_runtime_stop((*prev)->context);
+               if (*execlists->active)
+                       lrc_runtime_start((*execlists->active)->context);
                 new_timeslice(execlists);
                 new_timeslice(execlists);
+       }
  
         return inactive;
  }
  
         return inactive;
  }
@@ -2637,7 +2650,7 @@ unwind:
  }
  
  static const struct intel_context_ops execlists_context_ops = {
  }
  
  static const struct intel_context_ops execlists_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
  
         .alloc = execlists_context_alloc,
  
  
         .alloc = execlists_context_alloc,
  
@@ -3695,7 +3708,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
  }
  
  static const struct intel_context_ops virtual_context_ops = {
  }
  
  static const struct intel_context_ops virtual_context_ops = {
-       .flags = COPS_HAS_INFLIGHT,
+       .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
  
         .alloc = virtual_context_alloc,
  
  
         .alloc = virtual_context_alloc,
  
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c

index 0db822c..d5d1b04 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -161,6 +161,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
         if (gt->clock_frequency)
                 gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
  
         if (gt->clock_frequency)
                 gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
  
+       /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
+       if (GRAPHICS_VER(gt->i915) == 11)
+               gt->clock_period_ns = NSEC_PER_SEC / 13750000;
+
         GT_TRACE(gt,
                  "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
                  gt->clock_frequency / 1000,
         GT_TRACE(gt,
                  "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
                  gt->clock_frequency / 1000,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index dffef6a..3f83a90 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -778,7 +778,7 @@ static void init_common_regs(u32 * const regs,
                                            CTX_CTRL_RS_CTX_ENABLE);
         regs[CTX_CONTEXT_CONTROL] = ctl;
  
                                            CTX_CTRL_RS_CTX_ENABLE);
         regs[CTX_CONTEXT_CONTROL] = ctl;
  
-       regs[CTX_TIMESTAMP] = ce->runtime.last;
+       regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
  }
  
  static void init_wa_bb_regs(u32 * const regs,
  }
  
  static void init_wa_bb_regs(u32 * const regs,
@@ -1734,11 +1734,12 @@ err:
         }
  }
  
         }
  }
  
-static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
  {
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  {
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-       ce->runtime.num_underflow++;
-       ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+       stats->runtime.num_underflow++;
+       stats->runtime.max_underflow =
+               max_t(u32, stats->runtime.max_underflow, -dt);
  #endif
  }
  
  #endif
  }
  
@@ -1755,25 +1756,25 @@ static u32 lrc_get_runtime(const struct intel_context *ce)
  
  void lrc_update_runtime(struct intel_context *ce)
  {
  
  void lrc_update_runtime(struct intel_context *ce)
  {
+       struct intel_context_stats *stats = &ce->stats;
         u32 old;
         s32 dt;
  
         u32 old;
         s32 dt;
  
-       if (intel_context_is_barrier(ce))
+       old = stats->runtime.last;
+       stats->runtime.last = lrc_get_runtime(ce);
+       dt = stats->runtime.last - old;
+       if (!dt)
                 return;
  
                 return;
  
-       old = ce->runtime.last;
-       ce->runtime.last = lrc_get_runtime(ce);
-       dt = ce->runtime.last - old;
-
         if (unlikely(dt < 0)) {
                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
         if (unlikely(dt < 0)) {
                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
-                        old, ce->runtime.last, dt);
-               st_update_runtime_underflow(ce, dt);
+                        old, stats->runtime.last, dt);
+               st_runtime_underflow(stats, dt);
                 return;
         }
  
                 return;
         }
  
-       ewma_runtime_add(&ce->runtime.avg, dt);
-       ce->runtime.total += dt;
+       ewma_runtime_add(&stats->runtime.avg, dt);
+       stats->runtime.total += dt;
  }
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  }
  
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h

index 6e4f9f5..7371bb5 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -11,9 +11,10 @@
  #include <linux/bitfield.h>
  #include <linux/types.h>
  
  #include <linux/bitfield.h>
  #include <linux/types.h>
  
+#include "intel_context.h"
+
  struct drm_i915_gem_object;
  struct i915_gem_ww_ctx;
  struct drm_i915_gem_object;
  struct i915_gem_ww_ctx;
-struct intel_context;
  struct intel_engine_cs;
  struct intel_ring;
  struct kref;
  struct intel_engine_cs;
  struct intel_ring;
  struct kref;
@@ -120,4 +121,28 @@ static inline u32 lrc_desc_priority(int prio)
                 return GEN12_CTX_PRIORITY_NORMAL;
  }
  
                 return GEN12_CTX_PRIORITY_NORMAL;
  }
  
+static inline void lrc_runtime_start(struct intel_context *ce)
+{
+       struct intel_context_stats *stats = &ce->stats;
+
+       if (intel_context_is_barrier(ce))
+               return;
+
+       if (stats->active)
+               return;
+
+       WRITE_ONCE(stats->active, intel_context_clock());
+}
+
+static inline void lrc_runtime_stop(struct intel_context *ce)
+{
+       struct intel_context_stats *stats = &ce->stats;
+
+       if (!stats->active)
+               return;
+
+       lrc_update_runtime(ce);
+       WRITE_ONCE(stats->active, 0);
+}
+
  #endif /* __INTEL_LRC_H__ */
  #endif /* __INTEL_LRC_H__ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index 21c29d3..6ba52ef 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1753,8 +1753,8 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
         if (IS_ERR(ce))
                 return PTR_ERR(ce);
  
         if (IS_ERR(ce))
                 return PTR_ERR(ce);
  
-       ce->runtime.num_underflow = 0;
-       ce->runtime.max_underflow = 0;
+       ce->stats.runtime.num_underflow = 0;
+       ce->stats.runtime.max_underflow = 0;
  
         do {
                 unsigned int loop = 1024;
  
         do {
                 unsigned int loop = 1024;
@@ -1792,11 +1792,11 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
                 intel_context_get_avg_runtime_ns(ce));
  
         err = 0;
                 intel_context_get_avg_runtime_ns(ce));
  
         err = 0;
-       if (ce->runtime.num_underflow) {
+       if (ce->stats.runtime.num_underflow) {
                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
                        engine->name,
                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
                        engine->name,
-                      ce->runtime.num_underflow,
-                      ce->runtime.max_underflow);
+                      ce->stats.runtime.num_underflow,
+                      ce->stats.runtime.max_underflow);
                 GEM_TRACE_DUMP();
                 err = -EOVERFLOW;
         }
                 GEM_TRACE_DUMP();
                 err = -EOVERFLOW;
         }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c

index f41eb4d..7d09f92 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -509,13 +509,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
                                 const char *header,
                                 const struct i915_gem_context_coredump *ctx)
  {
                                 const char *header,
                                 const struct i915_gem_context_coredump *ctx)
  {
-       const u32 period = to_gt(m->i915)->clock_period_ns;
-
         err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
                    header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
                    ctx->guilty, ctx->active,
         err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
                    header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
                    ctx->guilty, ctx->active,
-                  ctx->total_runtime * period,
-                  mul_u32_u32(ctx->avg_runtime, period));
+                  ctx->total_runtime, ctx->avg_runtime);
  }
  
  static struct i915_vma_coredump *
  }
  
  static struct i915_vma_coredump *
@@ -1364,8 +1361,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
         e->guilty = atomic_read(&ctx->guilty_count);
         e->active = atomic_read(&ctx->active_count);
  
         e->guilty = atomic_read(&ctx->guilty_count);
         e->active = atomic_read(&ctx->active_count);
  
-       e->total_runtime = rq->context->runtime.total;
-       e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+       e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
+       e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
  
         simulated = i915_gem_context_no_error_capture(ctx);
  
  
         simulated = i915_gem_context_no_error_capture(ctx);
  
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h

index 09159ff..72d8607 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -94,7 +94,7 @@ struct intel_engine_coredump {
                 char comm[TASK_COMM_LEN];
  
                 u64 total_runtime;
                 char comm[TASK_COMM_LEN];
  
                 u64 total_runtime;
-               u32 avg_runtime;
+               u64 avg_runtime;
  
                 pid_t pid;
                 int active;
  
                 pid_t pid;
                 int active;
author	Tvrtko Ursulin <tvrtko.ursulin@intel.com>
	Fri, 1 Apr 2022 14:22:02 +0000 (15:22 +0100)
committer	Tvrtko Ursulin <tvrtko.ursulin@intel.com>
	Tue, 5 Apr 2022 07:39:10 +0000 (08:39 +0100)
drivers/gpu/drm/i915/gt/intel_context.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_execlists_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gpu_error.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gpu_error.h		patch \| blob \| history