drm/i915/guc: Implement GuC context operations for new inteface

author Matthew Brost <matthew.brost@intel.com>

Wed, 21 Jul 2021 21:50:49 +0000 (14:50 -0700)

committer John Harrison <John.C.Harrison@Intel.com>

Thu, 22 Jul 2021 17:07:08 +0000 (10:07 -0700)
author Matthew Brost <matthew.brost@intel.com>
Wed, 21 Jul 2021 21:50:49 +0000 (14:50 -0700)
committer John Harrison <John.C.Harrison@Intel.com>
Thu, 22 Jul 2021 17:07:08 +0000 (10:07 -0700)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c

index c1338441cc1d2e8e1bf17939bef5b3b6758ac645..090c13287d3e9665e2409232bf6ad2be2413f0c4 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  
         mutex_init(&ce->pin_mutex);
  
+       spin_lock_init(&ce->guc_state.lock);
+
+       ce->guc_id = GUC_INVALID_LRC_ID;
+       INIT_LIST_HEAD(&ce->guc_id_link);
+
         i915_active_init(&ce->active,
                          __intel_context_active, __intel_context_retire, 0);
  }
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h

index 6d99631d19b9061161a080a2e876b00404fcb19c..606c480aec26421efb5df7ca30088ee42738f81b 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -96,6 +96,7 @@ struct intel_context {
  #define CONTEXT_BANNED                 6
  #define CONTEXT_FORCE_SINGLE_SUBMISSION        7
  #define CONTEXT_NOPREEMPT              8
+#define CONTEXT_LRCA_DIRTY             9
  
         struct {
                 u64 timeout_us;
@@ -138,14 +139,29 @@ struct intel_context {
  
         u8 wa_bb_page; /* if set, page num reserved for context workarounds */
  
+       struct {
+               /** lock: protects everything in guc_state */
+               spinlock_t lock;
+               /**
+                * sched_state: scheduling state of this context using GuC
+                * submission
+                */
+               u8 sched_state;
+       } guc_state;
+
         /* GuC scheduling state flags that do not require a lock. */
         atomic_t guc_sched_state_no_lock;
  
+       /* GuC LRC descriptor ID */
+       u16 guc_id;
+
+       /* GuC LRC descriptor reference count */
+       atomic_t guc_id_ref;
+
         /*
-        * GuC LRC descriptor ID - Not assigned in this patch but future patches
-        * in the series will.
+        * GuC ID link - in list when unpinned but guc_id still valid in GuC
          */
-       u16 guc_id;
+       struct list_head guc_id_link;
  };
  
  #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h

index 9548f4ade068da8f9abdf723f07828b9a96bcbbc..f785d0ed238f5c0b285eeae9c4890174ccf23994 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -87,7 +87,6 @@
  #define GEN11_CSB_WRITE_PTR_MASK       (GEN11_CSB_PTR_MASK << 0)
  
  #define MAX_CONTEXT_HW_ID      (1 << 21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID  (1 << 20) /* exclusive */
  #define GEN11_MAX_CONTEXT_HW_ID        (1 << 11) /* exclusive */
  /* in Gen12 ID 0x7FF is reserved to indicate idle */
  #define GEN12_MAX_CONTEXT_HW_ID        (GEN11_MAX_CONTEXT_HW_ID - 1)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h

index 8c7b92f699f1a3d905cd055ffbbfd40296b67434..7fd6c3e343e454e5e2e892a9c7771c9fef365a26 100644 (file)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -7,6 +7,7 @@
  #define _INTEL_GUC_H_
  
  #include <linux/xarray.h>
+#include <linux/delay.h>
  
  #include "intel_uncore.h"
  #include "intel_guc_fw.h"
@@ -44,6 +45,14 @@ struct intel_guc {
                 void (*disable)(struct intel_guc *guc);
         } interrupts;
  
+       /*
+        * contexts_lock protects the pool of free guc ids and a linked list of
+        * guc ids available to be stolen
+        */
+       spinlock_t contexts_lock;
+       struct ida guc_ids;
+       struct list_head guc_id_list;
+
         bool submission_selected;
  
         struct i915_vma *ads_vma;
@@ -101,6 +110,41 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
                                  response_buf, response_buf_size, 0);
  }
  
+static inline int intel_guc_send_busy_loop(struct intel_guc *guc,
+                                          const u32 *action,
+                                          u32 len,
+                                          bool loop)
+{
+       int err;
+       unsigned int sleep_period_ms = 1;
+       bool not_atomic = !in_atomic() && !irqs_disabled();
+
+       /*
+        * FIXME: Have caller pass in if we are in an atomic context to avoid
+        * using in_atomic(). It is likely safe here as we check for irqs
+        * disabled which basically all the spin locks in the i915 do but
+        * regardless this should be cleaned up.
+        */
+
+       /* No sleeping with spin locks, just busy loop */
+       might_sleep_if(loop && not_atomic);
+
+retry:
+       err = intel_guc_send_nb(guc, action, len);
+       if (unlikely(err == -EBUSY && loop)) {
+               if (likely(not_atomic)) {
+                       if (msleep_interruptible(sleep_period_ms))
+                               return -EINTR;
+                       sleep_period_ms = sleep_period_ms << 1;
+               } else {
+                       cpu_relax();
+               }
+               goto retry;
+       }
+
+       return err;
+}
+
  static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
  {
         intel_guc_ct_event_handler(&guc->ct);
@@ -202,6 +246,9 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
  int intel_guc_reset_engine(struct intel_guc *guc,
                            struct intel_engine_cs *engine);
  
+int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
+                                         const u32 *msg, u32 len);
+
  void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
  
  #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

index 83ec60ea3f89cc9042a2b1750abad871067aab2b..28ff82c5be45e0332800bd14d840a5e5a7539169 100644 (file)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -928,6 +928,10 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
         case INTEL_GUC_ACTION_DEFAULT:
                 ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
                 break;
+       case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+               ret = intel_guc_deregister_done_process_msg(guc, payload,
+                                                           len);
+               break;
         default:
                 ret = -EOPNOTSUPP;
                 break;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 53b4a5eb4a85ca3281c9dd5e396db6197b3feb42..463613a414d2539523bccabc9903aec5a9a4baaa 100644 (file)
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -13,7 +13,9 @@
  #include "gt/intel_gt.h"
  #include "gt/intel_gt_irq.h"
  #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
  #include "gt/intel_lrc.h"
+#include "gt/intel_lrc_reg.h"
  #include "gt/intel_mocs.h"
  #include "gt/intel_ring.h"
  
@@ -85,6 +87,72 @@ static inline void clr_context_enabled(struct intel_context *ce)
                    &ce->guc_sched_state_no_lock);
  }
  
+/*
+ * Below is a set of functions which control the GuC scheduling state which
+ * require a lock, aside from the special case where the functions are called
+ * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
+ * path to be executing on the context.
+ */
+#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER    BIT(0)
+#define SCHED_STATE_DESTROYED                          BIT(1)
+static inline void init_sched_state(struct intel_context *ce)
+{
+       /* Only should be called from guc_lrc_desc_pin() */
+       atomic_set(&ce->guc_sched_state_no_lock, 0);
+       ce->guc_state.sched_state = 0;
+}
+
+static inline bool
+context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+       return ce->guc_state.sched_state &
+               SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline void
+set_context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+       /* Only should be called from guc_lrc_desc_pin() */
+       ce->guc_state.sched_state |=
+               SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline void
+clr_context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+       lockdep_assert_held(&ce->guc_state.lock);
+       ce->guc_state.sched_state &=
+               ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline bool
+context_destroyed(struct intel_context *ce)
+{
+       return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
+}
+
+static inline void
+set_context_destroyed(struct intel_context *ce)
+{
+       lockdep_assert_held(&ce->guc_state.lock);
+       ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
+}
+
+static inline bool context_guc_id_invalid(struct intel_context *ce)
+{
+       return ce->guc_id == GUC_INVALID_LRC_ID;
+}
+
+static inline void set_context_guc_id_invalid(struct intel_context *ce)
+{
+       ce->guc_id = GUC_INVALID_LRC_ID;
+}
+
+static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
+{
+       return &ce->engine->gt->uc.guc;
+}
+
  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  {
         return rb_entry(rb, struct i915_priolist, node);
@@ -155,6 +223,9 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
         int len = 0;
         bool enabled = context_enabled(ce);
  
+       GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
+       GEM_BUG_ON(context_guc_id_invalid(ce));
+
         if (!enabled) {
                 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
                 action[len++] = ce->guc_id;
@@ -417,6 +488,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
  
         xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
  
+       spin_lock_init(&guc->contexts_lock);
+       INIT_LIST_HEAD(&guc->guc_id_list);
+       ida_init(&guc->guc_ids);
+
         return 0;
  }
  
@@ -429,9 +504,308 @@ void intel_guc_submission_fini(struct intel_guc *guc)
         i915_sched_engine_put(guc->sched_engine);
  }
  
-static int guc_context_alloc(struct intel_context *ce)
+static inline void queue_request(struct i915_sched_engine *sched_engine,
+                                struct i915_request *rq,
+                                int prio)
  {
-       return lrc_alloc(ce, ce->engine);
+       GEM_BUG_ON(!list_empty(&rq->sched.link));
+       list_add_tail(&rq->sched.link,
+                     i915_sched_lookup_priolist(sched_engine, prio));
+       set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
+static int guc_bypass_tasklet_submit(struct intel_guc *guc,
+                                    struct i915_request *rq)
+{
+       int ret;
+
+       __i915_request_submit(rq);
+
+       trace_i915_request_in(rq, 0);
+
+       guc_set_lrc_tail(rq);
+       ret = guc_add_request(guc, rq);
+       if (ret == -EBUSY)
+               guc->stalled_request = rq;
+
+       return ret;
+}
+
+static void guc_submit_request(struct i915_request *rq)
+{
+       struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
+       struct intel_guc *guc = &rq->engine->gt->uc.guc;
+       unsigned long flags;
+
+       /* Will be called from irq-context when using foreign fences. */
+       spin_lock_irqsave(&sched_engine->lock, flags);
+
+       if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
+               queue_request(sched_engine, rq, rq_prio(rq));
+       else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
+               tasklet_hi_schedule(&sched_engine->tasklet);
+
+       spin_unlock_irqrestore(&sched_engine->lock, flags);
+}
+
+static int new_guc_id(struct intel_guc *guc)
+{
+       return ida_simple_get(&guc->guc_ids, 0,
+                             GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
+                             __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+}
+
+static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+       if (!context_guc_id_invalid(ce)) {
+               ida_simple_remove(&guc->guc_ids, ce->guc_id);
+               reset_lrc_desc(guc, ce->guc_id);
+               set_context_guc_id_invalid(ce);
+       }
+       if (!list_empty(&ce->guc_id_link))
+               list_del_init(&ce->guc_id_link);
+}
+
+static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&guc->contexts_lock, flags);
+       __release_guc_id(guc, ce);
+       spin_unlock_irqrestore(&guc->contexts_lock, flags);
+}
+
+static int steal_guc_id(struct intel_guc *guc)
+{
+       struct intel_context *ce;
+       int guc_id;
+
+       lockdep_assert_held(&guc->contexts_lock);
+
+       if (!list_empty(&guc->guc_id_list)) {
+               ce = list_first_entry(&guc->guc_id_list,
+                                     struct intel_context,
+                                     guc_id_link);
+
+               GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+               GEM_BUG_ON(context_guc_id_invalid(ce));
+
+               list_del_init(&ce->guc_id_link);
+               guc_id = ce->guc_id;
+               set_context_guc_id_invalid(ce);
+               return guc_id;
+       } else {
+               return -EAGAIN;
+       }
+}
+
+static int assign_guc_id(struct intel_guc *guc, u16 *out)
+{
+       int ret;
+
+       lockdep_assert_held(&guc->contexts_lock);
+
+       ret = new_guc_id(guc);
+       if (unlikely(ret < 0)) {
+               ret = steal_guc_id(guc);
+               if (ret < 0)
+                       return ret;
+       }
+
+       *out = ret;
+       return 0;
+}
+
+#define PIN_GUC_ID_TRIES       4
+static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+       int ret = 0;
+       unsigned long flags, tries = PIN_GUC_ID_TRIES;
+
+       GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+
+try_again:
+       spin_lock_irqsave(&guc->contexts_lock, flags);
+
+       if (context_guc_id_invalid(ce)) {
+               ret = assign_guc_id(guc, &ce->guc_id);
+               if (ret)
+                       goto out_unlock;
+               ret = 1;        /* Indidcates newly assigned guc_id */
+       }
+       if (!list_empty(&ce->guc_id_link))
+               list_del_init(&ce->guc_id_link);
+       atomic_inc(&ce->guc_id_ref);
+
+out_unlock:
+       spin_unlock_irqrestore(&guc->contexts_lock, flags);
+
+       /*
+        * -EAGAIN indicates no guc_ids are available, let's retire any
+        * outstanding requests to see if that frees up a guc_id. If the first
+        * retire didn't help, insert a sleep with the timeslice duration before
+        * attempting to retire more requests. Double the sleep period each
+        * subsequent pass before finally giving up. The sleep period has max of
+        * 100ms and minimum of 1ms.
+        */
+       if (ret == -EAGAIN && --tries) {
+               if (PIN_GUC_ID_TRIES - tries > 1) {
+                       unsigned int timeslice_shifted =
+                               ce->engine->props.timeslice_duration_ms <<
+                               (PIN_GUC_ID_TRIES - tries - 2);
+                       unsigned int max = min_t(unsigned int, 100,
+                                                timeslice_shifted);
+
+                       msleep(max_t(unsigned int, max, 1));
+               }
+               intel_gt_retire_requests(guc_to_gt(guc));
+               goto try_again;
+       }
+
+       return ret;
+}
+
+static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+       unsigned long flags;
+
+       GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
+
+       if (unlikely(context_guc_id_invalid(ce)))
+               return;
+
+       spin_lock_irqsave(&guc->contexts_lock, flags);
+       if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
+           !atomic_read(&ce->guc_id_ref))
+               list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
+       spin_unlock_irqrestore(&guc->contexts_lock, flags);
+}
+
+static int __guc_action_register_context(struct intel_guc *guc,
+                                        u32 guc_id,
+                                        u32 offset)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_REGISTER_CONTEXT,
+               guc_id,
+               offset,
+       };
+
+       return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
+}
+
+static int register_context(struct intel_context *ce)
+{
+       struct intel_guc *guc = ce_to_guc(ce);
+       u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
+               ce->guc_id * sizeof(struct guc_lrc_desc);
+
+       return __guc_action_register_context(guc, ce->guc_id, offset);
+}
+
+static int __guc_action_deregister_context(struct intel_guc *guc,
+                                          u32 guc_id)
+{
+       u32 action[] = {
+               INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
+               guc_id,
+       };
+
+       return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
+}
+
+static int deregister_context(struct intel_context *ce, u32 guc_id)
+{
+       struct intel_guc *guc = ce_to_guc(ce);
+
+       return __guc_action_deregister_context(guc, guc_id);
+}
+
+static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
+{
+       switch (class) {
+       case RENDER_CLASS:
+               return mask >> RCS0;
+       case VIDEO_ENHANCEMENT_CLASS:
+               return mask >> VECS0;
+       case VIDEO_DECODE_CLASS:
+               return mask >> VCS0;
+       case COPY_ENGINE_CLASS:
+               return mask >> BCS0;
+       default:
+               MISSING_CASE(class);
+               return 0;
+       }
+}
+
+static void guc_context_policy_init(struct intel_engine_cs *engine,
+                                   struct guc_lrc_desc *desc)
+{
+       desc->policy_flags = 0;
+
+       desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
+       desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
+}
+
+static int guc_lrc_desc_pin(struct intel_context *ce)
+{
+       struct intel_engine_cs *engine = ce->engine;
+       struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
+       struct intel_guc *guc = &engine->gt->uc.guc;
+       u32 desc_idx = ce->guc_id;
+       struct guc_lrc_desc *desc;
+       bool context_registered;
+       intel_wakeref_t wakeref;
+       int ret = 0;
+
+       GEM_BUG_ON(!engine->mask);
+
+       /*
+        * Ensure LRC + CT vmas are is same region as write barrier is done
+        * based on CT vma region.
+        */
+       GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+                  i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+       context_registered = lrc_desc_registered(guc, desc_idx);
+
+       reset_lrc_desc(guc, desc_idx);
+       set_lrc_desc_registered(guc, desc_idx, ce);
+
+       desc = __get_lrc_desc(guc, desc_idx);
+       desc->engine_class = engine_class_to_guc_class(engine->class);
+       desc->engine_submit_mask = adjust_engine_mask(engine->class,
+                                                     engine->mask);
+       desc->hw_context_desc = ce->lrc.lrca;
+       desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
+       desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+       guc_context_policy_init(engine, desc);
+       init_sched_state(ce);
+
+       /*
+        * The context_lookup xarray is used to determine if the hardware
+        * context is currently registered. There are two cases in which it
+        * could be registered either the guc_id has been stolen from another
+        * context or the lrc descriptor address of this context has changed. In
+        * either case the context needs to be deregistered with the GuC before
+        * registering this context.
+        */
+       if (context_registered) {
+               set_context_wait_for_deregister_to_register(ce);
+               intel_context_get(ce);
+
+               /*
+                * If stealing the guc_id, this ce has the same guc_id as the
+                * context whose guc_id was stolen.
+                */
+               with_intel_runtime_pm(runtime_pm, wakeref)
+                       ret = deregister_context(ce, ce->guc_id);
+       } else {
+               with_intel_runtime_pm(runtime_pm, wakeref)
+                       ret = register_context(ce);
+       }
+
+       return ret;
  }
  
  static int guc_context_pre_pin(struct intel_context *ce,
@@ -443,36 +817,144 @@ static int guc_context_pre_pin(struct intel_context *ce,
  
  static int guc_context_pin(struct intel_context *ce, void *vaddr)
  {
+       if (i915_ggtt_offset(ce->state) !=
+           (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
+               set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
+
+       /*
+        * GuC context gets pinned in guc_request_alloc. See that function for
+        * explaination of why.
+        */
+
         return lrc_pin(ce, ce->engine, vaddr);
  }
  
+static void guc_context_unpin(struct intel_context *ce)
+{
+       struct intel_guc *guc = ce_to_guc(ce);
+
+       unpin_guc_id(guc, ce);
+       lrc_unpin(ce);
+}
+
+static void guc_context_post_unpin(struct intel_context *ce)
+{
+       lrc_post_unpin(ce);
+}
+
+static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+{
+       struct intel_guc *guc = ce_to_guc(ce);
+       unsigned long flags;
+
+       GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
+       GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
+
+       spin_lock_irqsave(&ce->guc_state.lock, flags);
+       set_context_destroyed(ce);
+       spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+       deregister_context(ce, ce->guc_id);
+}
+
+static void guc_context_destroy(struct kref *kref)
+{
+       struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+       struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
+       struct intel_guc *guc = ce_to_guc(ce);
+       intel_wakeref_t wakeref;
+       unsigned long flags;
+
+       /*
+        * If the guc_id is invalid this context has been stolen and we can free
+        * it immediately. Also can be freed immediately if the context is not
+        * registered with the GuC.
+        */
+       if (context_guc_id_invalid(ce)) {
+               lrc_destroy(kref);
+               return;
+       } else if (!lrc_desc_registered(guc, ce->guc_id)) {
+               release_guc_id(guc, ce);
+               lrc_destroy(kref);
+               return;
+       }
+
+       /*
+        * We have to acquire the context spinlock and check guc_id again, if it
+        * is valid it hasn't been stolen and needs to be deregistered. We
+        * delete this context from the list of unpinned guc_ids available to
+        * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
+        * returns indicating this context has been deregistered the guc_id is
+        * returned to the pool of available guc_ids.
+        */
+       spin_lock_irqsave(&guc->contexts_lock, flags);
+       if (context_guc_id_invalid(ce)) {
+               spin_unlock_irqrestore(&guc->contexts_lock, flags);
+               lrc_destroy(kref);
+               return;
+       }
+
+       if (!list_empty(&ce->guc_id_link))
+               list_del_init(&ce->guc_id_link);
+       spin_unlock_irqrestore(&guc->contexts_lock, flags);
+
+       /*
+        * We defer GuC context deregistration until the context is destroyed
+        * in order to save on CTBs. With this optimization ideally we only need
+        * 1 CTB to register the context during the first pin and 1 CTB to
+        * deregister the context when the context is destroyed. Without this
+        * optimization, a CTB would be needed every pin & unpin.
+        *
+        * XXX: Need to acqiure the runtime wakeref as this can be triggered
+        * from context_free_worker when runtime wakeref is not held.
+        * guc_lrc_desc_unpin requires the runtime as a GuC register is written
+        * in H2G CTB to deregister the context. A future patch may defer this
+        * H2G CTB if the runtime wakeref is zero.
+        */
+       with_intel_runtime_pm(runtime_pm, wakeref)
+               guc_lrc_desc_unpin(ce);
+}
+
+static int guc_context_alloc(struct intel_context *ce)
+{
+       return lrc_alloc(ce, ce->engine);
+}
+
  static const struct intel_context_ops guc_context_ops = {
         .alloc = guc_context_alloc,
  
         .pre_pin = guc_context_pre_pin,
         .pin = guc_context_pin,
-       .unpin = lrc_unpin,
-       .post_unpin = lrc_post_unpin,
+       .unpin = guc_context_unpin,
+       .post_unpin = guc_context_post_unpin,
  
         .enter = intel_context_enter_engine,
         .exit = intel_context_exit_engine,
  
         .reset = lrc_reset,
-       .destroy = lrc_destroy,
+       .destroy = guc_context_destroy,
  };
  
-static int guc_request_alloc(struct i915_request *request)
+static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
+{
+       return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
+               !lrc_desc_registered(ce_to_guc(ce), ce->guc_id);
+}
+
+static int guc_request_alloc(struct i915_request *rq)
  {
+       struct intel_context *ce = rq->context;
+       struct intel_guc *guc = ce_to_guc(ce);
         int ret;
  
-       GEM_BUG_ON(!intel_context_is_pinned(request->context));
+       GEM_BUG_ON(!intel_context_is_pinned(rq->context));
  
         /*
          * Flush enough space to reduce the likelihood of waiting after
          * we start building the request - in which case we will just
          * have to repeat work.
          */
-       request->reserved_space += GUC_REQUEST_SIZE;
+       rq->reserved_space += GUC_REQUEST_SIZE;
  
         /*
          * Note that after this point, we have committed to using
@@ -483,56 +965,47 @@ static int guc_request_alloc(struct i915_request *request)
          */
  
         /* Unconditionally invalidate GPU caches and TLBs. */
-       ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+       ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
         if (ret)
                 return ret;
  
-       request->reserved_space -= GUC_REQUEST_SIZE;
-       return 0;
-}
+       rq->reserved_space -= GUC_REQUEST_SIZE;
  
-static inline void queue_request(struct i915_sched_engine *sched_engine,
-                                struct i915_request *rq,
-                                int prio)
-{
-       GEM_BUG_ON(!list_empty(&rq->sched.link));
-       list_add_tail(&rq->sched.link,
-                     i915_sched_lookup_priolist(sched_engine, prio));
-       set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static int guc_bypass_tasklet_submit(struct intel_guc *guc,
-                                    struct i915_request *rq)
-{
-       int ret;
-
-       __i915_request_submit(rq);
-
-       trace_i915_request_in(rq, 0);
-
-       guc_set_lrc_tail(rq);
-       ret = guc_add_request(guc, rq);
-       if (ret == -EBUSY)
-               guc->stalled_request = rq;
-
-       return ret;
-}
-
-static void guc_submit_request(struct i915_request *rq)
-{
-       struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
-       struct intel_guc *guc = &rq->engine->gt->uc.guc;
-       unsigned long flags;
+       /*
+        * Call pin_guc_id here rather than in the pinning step as with
+        * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
+        * guc_ids and creating horrible race conditions. This is especially bad
+        * when guc_ids are being stolen due to over subscription. By the time
+        * this function is reached, it is guaranteed that the guc_id will be
+        * persistent until the generated request is retired. Thus, sealing these
+        * race conditions. It is still safe to fail here if guc_ids are
+        * exhausted and return -EAGAIN to the user indicating that they can try
+        * again in the future.
+        *
+        * There is no need for a lock here as the timeline mutex ensures at
+        * most one context can be executing this code path at once. The
+        * guc_id_ref is incremented once for every request in flight and
+        * decremented on each retire. When it is zero, a lock around the
+        * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
+        */
+       if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
+               return 0;
  
-       /* Will be called from irq-context when using foreign fences. */
-       spin_lock_irqsave(&sched_engine->lock, flags);
+       ret = pin_guc_id(guc, ce);      /* returns 1 if new guc_id assigned */
+       if (unlikely(ret < 0))
+               return ret;
+       if (context_needs_register(ce, !!ret)) {
+               ret = guc_lrc_desc_pin(ce);
+               if (unlikely(ret)) {    /* unwind */
+                       atomic_dec(&ce->guc_id_ref);
+                       unpin_guc_id(guc, ce);
+                       return ret;
+               }
+       }
  
-       if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
-               queue_request(sched_engine, rq, rq_prio(rq));
-       else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
-               tasklet_hi_schedule(&sched_engine->tasklet);
+       clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
  
-       spin_unlock_irqrestore(&sched_engine->lock, flags);
+       return 0;
  }
  
  static void sanitize_hwsp(struct intel_engine_cs *engine)
@@ -606,6 +1079,41 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
         engine->submit_request = guc_submit_request;
  }
  
+static inline void guc_kernel_context_pin(struct intel_guc *guc,
+                                         struct intel_context *ce)
+{
+       if (context_guc_id_invalid(ce))
+               pin_guc_id(guc, ce);
+       guc_lrc_desc_pin(ce);
+}
+
+static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       /* make sure all descriptors are clean... */
+       xa_destroy(&guc->context_lookup);
+
+       /*
+        * Some contexts might have been pinned before we enabled GuC
+        * submission, so we need to add them to the GuC bookeeping.
+        * Also, after a reset the of the GuC we want to make sure that the
+        * information shared with GuC is properly reset. The kernel LRCs are
+        * not attached to the gem_context, so they need to be added separately.
+        *
+        * Note: we purposefully do not check the return of guc_lrc_desc_pin,
+        * because that function can only fail if a reset is just starting. This
+        * is at the end of reset so presumably another reset isn't happening
+        * and even it did this code would be run again.
+        */
+
+       for_each_engine(engine, gt, id)
+               if (engine->kernel_context)
+                       guc_kernel_context_pin(guc, engine->kernel_context);
+}
+
  static void guc_release(struct intel_engine_cs *engine)
  {
         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
@@ -718,6 +1226,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
  
  void intel_guc_submission_enable(struct intel_guc *guc)
  {
+       guc_init_lrc_mapping(guc);
  }
  
  void intel_guc_submission_disable(struct intel_guc *guc)
@@ -743,3 +1252,62 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
  {
         guc->submission_selected = __guc_submission_selected(guc);
  }
+
+static inline struct intel_context *
+g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
+{
+       struct intel_context *ce;
+
+       if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
+               drm_err(&guc_to_gt(guc)->i915->drm,
+                       "Invalid desc_idx %u", desc_idx);
+               return NULL;
+       }
+
+       ce = __get_context(guc, desc_idx);
+       if (unlikely(!ce)) {
+               drm_err(&guc_to_gt(guc)->i915->drm,
+                       "Context is NULL, desc_idx %u", desc_idx);
+               return NULL;
+       }
+
+       return ce;
+}
+
+int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
+                                         const u32 *msg,
+                                         u32 len)
+{
+       struct intel_context *ce;
+       u32 desc_idx = msg[0];
+
+       if (unlikely(len < 1)) {
+               drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+               return -EPROTO;
+       }
+
+       ce = g2h_context_lookup(guc, desc_idx);
+       if (unlikely(!ce))
+               return -EPROTO;
+
+       if (context_wait_for_deregister_to_register(ce)) {
+               struct intel_runtime_pm *runtime_pm =
+                       &ce->engine->gt->i915->runtime_pm;
+               intel_wakeref_t wakeref;
+
+               /*
+                * Previous owner of this guc_id has been deregistered, now safe
+                * register this context.
+                */
+               with_intel_runtime_pm(runtime_pm, wakeref)
+                       register_context(ce);
+               clr_context_wait_for_deregister_to_register(ce);
+               intel_context_put(ce);
+       } else if (context_destroyed(ce)) {
+               /* Context has been destroyed */
+               release_guc_id(guc, ce);
+               lrc_destroy(&ce->ref);
+       }
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index b956e3a63ca9617f6053fd03e64a9d32dd335ef8..1022400dd1bd26ffb41180caf1e75179f1dd9dd8 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4142,6 +4142,7 @@ enum {
         FAULT_AND_CONTINUE /* Unsupported */
  };
  
+#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
  #define GEN8_CTX_VALID (1 << 0)
  #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
  #define GEN8_CTX_FORCE_RESTORE (1 << 2)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index d3de9f60e03aa9a5622df44d36d35b0ccd3aa643..d35d7c96839df29e8f53a1b2f8738bc8befb2d5a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -407,6 +407,7 @@ bool i915_request_retire(struct i915_request *rq)
          */
         if (!list_empty(&rq->sched.link))
                 remove_from_engine(rq);
+       atomic_dec(&rq->context->guc_id_ref);
         GEM_BUG_ON(!llist_empty(&rq->execute_cb));
  
         __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
author	Matthew Brost <matthew.brost@intel.com>
	Wed, 21 Jul 2021 21:50:49 +0000 (14:50 -0700)
committer	John Harrison <John.C.Harrison@Intel.com>
	Thu, 22 Jul 2021 17:07:08 +0000 (10:07 -0700)
drivers/gpu/drm/i915/gt/intel_context.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_context_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc_reg.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/uc/intel_guc.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_reg.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history