1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014 Intel Corporation
6 #include <linux/circ_buf.h>
8 #include "gem/i915_gem_context.h"
9 #include "gem/i915_gem_lmem.h"
10 #include "gt/gen8_engine_cs.h"
11 #include "gt/intel_breadcrumbs.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_engine_pm.h"
15 #include "gt/intel_engine_regs.h"
16 #include "gt/intel_gpu_commands.h"
17 #include "gt/intel_gt.h"
18 #include "gt/intel_gt_clock_utils.h"
19 #include "gt/intel_gt_irq.h"
20 #include "gt/intel_gt_pm.h"
21 #include "gt/intel_gt_regs.h"
22 #include "gt/intel_gt_requests.h"
23 #include "gt/intel_lrc.h"
24 #include "gt/intel_lrc_reg.h"
25 #include "gt/intel_mocs.h"
26 #include "gt/intel_ring.h"
28 #include "intel_guc_ads.h"
29 #include "intel_guc_capture.h"
30 #include "intel_guc_print.h"
31 #include "intel_guc_submission.h"
35 #include "i915_trace.h"
38 * DOC: GuC-based command submission
40 * The Scratch registers:
41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
43 * triggers an interrupt on the GuC via another register write (0xC4C8).
44 * Firmware writes a success/fail code back to the action register after
45 * processes the request. The kernel driver polls waiting for this update and
48 * Command Transport buffers (CTBs):
49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
50 * - G2H) are a message interface between the i915 and GuC.
52 * Context registration:
53 * Before a context can be submitted it must be registered with the GuC via a
54 * H2G. A unique guc_id is associated with each context. The context is either
55 * registered at request creation time (normal operation) or at submission time
56 * (abnormal operation, e.g. after a reset).
59 * The i915 updates the LRC tail value in memory. The i915 must enable the
60 * scheduling of the context within the GuC for the GuC to actually consider it.
61 * Therefore, the first time a disabled context is submitted we use a schedule
62 * enable H2G, while follow up submissions are done via the context submit H2G,
63 * which informs the GuC that a previously enabled context has new work
67 * To unpin a context a H2G is used to disable scheduling. When the
68 * corresponding G2H returns indicating the scheduling disable operation has
69 * completed it is safe to unpin the context. While a disable is in flight it
70 * isn't safe to resubmit the context so a fence is used to stall all future
71 * requests of that context until the G2H is returned. Because this interaction
72 * with the GuC takes a non-zero amount of time we delay the disabling of
73 * scheduling after the pin count goes to zero by a configurable period of time
74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
75 * time to resubmit something on the context before doing this costly operation.
76 * This delay is only done if the context isn't closed and the guc_id usage is
77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
79 * Context deregistration:
80 * Before a context can be destroyed or if we steal its guc_id we must
81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
82 * safe to submit anything to this guc_id until the deregister completes so a
83 * fence is used to stall all requests associated with this guc_id until the
84 * corresponding G2H returns indicating the guc_id has been deregistered.
86 * submission_state.guc_ids:
87 * Unique number associated with private GuC context data passed in during
88 * context registration / submission / deregistration. 64k available. Simple ida
89 * is used for allocation.
92 * If no guc_ids are available they can be stolen from another context at
93 * request creation time if that context is unpinned. If a guc_id can't be found
94 * we punt this problem to the user as we believe this is near impossible to hit
95 * during normal use cases.
98 * In the GuC submission code we have 3 basic spin locks which protect
99 * everything. Details about each below.
102 * This is the submission lock for all contexts that share an i915 schedule
103 * engine (sched_engine), thus only one of the contexts which share a
104 * sched_engine can be submitting at a time. Currently only one sched_engine is
105 * used for all of GuC submission but that could change in the future.
107 * guc->submission_state.lock
108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
112 * Protects everything under ce->guc_state. Ensures that a context is in the
113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
114 * on a disabled context (bad idea), we don't issue a schedule enable when a
115 * schedule disable is in flight, etc... Also protects list of inflight requests
116 * on the context and the priority management state. Lock is individual to each
119 * Lock ordering rules:
120 * sched_engine->lock -> ce->guc_state.lock
121 * guc->submission_state.lock -> ce->guc_state.lock
124 * When a full GT reset is triggered it is assumed that some G2H responses to
125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
127 * contexts, release guc_ids, etc...). When this occurs we can scrub the
128 * context state and cleanup appropriately, however this is quite racey.
129 * To avoid races, the reset code must disable submission before scrubbing for
130 * the missing G2H, while the submission code must check for submission being
131 * disabled and skip sending H2Gs and updating context states when it is. Both
132 * sides must also make sure to hold the relevant locks.
135 /* GuC Virtual Engine */
136 struct guc_virtual_engine {
137 struct intel_engine_cs base;
138 struct intel_context context;
141 static struct intel_context *
142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
143 unsigned long flags);
145 static struct intel_context *
146 guc_create_parallel(struct intel_engine_cs **engines,
147 unsigned int num_siblings,
150 #define GUC_REQUEST_SIZE 64 /* bytes */
153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
154 * per the GuC submission interface. A different allocation algorithm is used
155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
156 * partition the guc_id space. We believe the number of multi-lrc contexts in
157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
161 ((guc)->submission_state.num_guc_ids / 16)
164 * Below is a set of functions which control the GuC scheduling state which
167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
168 #define SCHED_STATE_DESTROYED BIT(1)
169 #define SCHED_STATE_PENDING_DISABLE BIT(2)
170 #define SCHED_STATE_BANNED BIT(3)
171 #define SCHED_STATE_ENABLED BIT(4)
172 #define SCHED_STATE_PENDING_ENABLE BIT(5)
173 #define SCHED_STATE_REGISTERED BIT(6)
174 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
175 #define SCHED_STATE_CLOSED BIT(8)
176 #define SCHED_STATE_BLOCKED_SHIFT 9
177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
180 static inline void init_sched_state(struct intel_context *ce)
182 lockdep_assert_held(&ce->guc_state.lock);
183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
188 * A context close can race with the submission path, so SCHED_STATE_CLOSED
189 * can be set immediately before we try to register.
191 #define SCHED_STATE_VALID_INIT \
192 (SCHED_STATE_BLOCKED_MASK | \
193 SCHED_STATE_CLOSED | \
194 SCHED_STATE_REGISTERED)
197 static bool sched_state_is_init(struct intel_context *ce)
199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
203 context_wait_for_deregister_to_register(struct intel_context *ce)
205 return ce->guc_state.sched_state &
206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
210 set_context_wait_for_deregister_to_register(struct intel_context *ce)
212 lockdep_assert_held(&ce->guc_state.lock);
213 ce->guc_state.sched_state |=
214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
218 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
220 lockdep_assert_held(&ce->guc_state.lock);
221 ce->guc_state.sched_state &=
222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
226 context_destroyed(struct intel_context *ce)
228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
232 set_context_destroyed(struct intel_context *ce)
234 lockdep_assert_held(&ce->guc_state.lock);
235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
238 static inline bool context_pending_disable(struct intel_context *ce)
240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
243 static inline void set_context_pending_disable(struct intel_context *ce)
245 lockdep_assert_held(&ce->guc_state.lock);
246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
249 static inline void clr_context_pending_disable(struct intel_context *ce)
251 lockdep_assert_held(&ce->guc_state.lock);
252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
255 static inline bool context_banned(struct intel_context *ce)
257 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
260 static inline void set_context_banned(struct intel_context *ce)
262 lockdep_assert_held(&ce->guc_state.lock);
263 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
266 static inline void clr_context_banned(struct intel_context *ce)
268 lockdep_assert_held(&ce->guc_state.lock);
269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
272 static inline bool context_enabled(struct intel_context *ce)
274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
277 static inline void set_context_enabled(struct intel_context *ce)
279 lockdep_assert_held(&ce->guc_state.lock);
280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
283 static inline void clr_context_enabled(struct intel_context *ce)
285 lockdep_assert_held(&ce->guc_state.lock);
286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
289 static inline bool context_pending_enable(struct intel_context *ce)
291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
294 static inline void set_context_pending_enable(struct intel_context *ce)
296 lockdep_assert_held(&ce->guc_state.lock);
297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
300 static inline void clr_context_pending_enable(struct intel_context *ce)
302 lockdep_assert_held(&ce->guc_state.lock);
303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
306 static inline bool context_registered(struct intel_context *ce)
308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
311 static inline void set_context_registered(struct intel_context *ce)
313 lockdep_assert_held(&ce->guc_state.lock);
314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
317 static inline void clr_context_registered(struct intel_context *ce)
319 lockdep_assert_held(&ce->guc_state.lock);
320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
323 static inline bool context_policy_required(struct intel_context *ce)
325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
328 static inline void set_context_policy_required(struct intel_context *ce)
330 lockdep_assert_held(&ce->guc_state.lock);
331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
334 static inline void clr_context_policy_required(struct intel_context *ce)
336 lockdep_assert_held(&ce->guc_state.lock);
337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
340 static inline bool context_close_done(struct intel_context *ce)
342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
345 static inline void set_context_close_done(struct intel_context *ce)
347 lockdep_assert_held(&ce->guc_state.lock);
348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
351 static inline u32 context_blocked(struct intel_context *ce)
353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
354 SCHED_STATE_BLOCKED_SHIFT;
357 static inline void incr_context_blocked(struct intel_context *ce)
359 lockdep_assert_held(&ce->guc_state.lock);
361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
366 static inline void decr_context_blocked(struct intel_context *ce)
368 lockdep_assert_held(&ce->guc_state.lock);
370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
375 static struct intel_context *
376 request_to_scheduling_context(struct i915_request *rq)
378 return intel_context_to_parent(rq->context);
381 static inline bool context_guc_id_invalid(struct intel_context *ce)
383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
386 static inline void set_context_guc_id_invalid(struct intel_context *ce)
388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
393 return &ce->engine->gt->uc.guc;
396 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
398 return rb_entry(rb, struct i915_priolist, node);
402 * When using multi-lrc submission a scratch memory area is reserved in the
403 * parent's context state for the process descriptor, work queue, and handshake
404 * between the parent + children contexts to insert safe preemption points
405 * between each of the BBs. Currently the scratch area is sized to a page.
407 * The layout of this scratch area is below:
409 * + sizeof(struct guc_process_desc) child go
410 * + CACHELINE_BYTES child join[0]
412 * + CACHELINE_BYTES child join[n - 1]
414 * PARENT_SCRATCH_SIZE / 2 work queue start
416 * PARENT_SCRATCH_SIZE - 1 work queue end
418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
421 struct sync_semaphore {
423 u8 unused[CACHELINE_BYTES - sizeof(u32)];
426 struct parent_scratch {
428 struct guc_sched_wq_desc wq_desc;
429 struct guc_process_desc_v69 pdesc;
432 struct sync_semaphore go;
433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
438 u32 wq[WQ_SIZE / sizeof(u32)];
441 static u32 __get_parent_scratch_offset(struct intel_context *ce)
443 GEM_BUG_ON(!ce->parallel.guc.parent_page);
445 return ce->parallel.guc.parent_page * PAGE_SIZE;
448 static u32 __get_wq_offset(struct intel_context *ce)
450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
452 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
455 static struct parent_scratch *
456 __get_parent_scratch(struct intel_context *ce)
458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
462 * Need to subtract LRC_STATE_OFFSET here as the
463 * parallel.guc.parent_page is the offset into ce->state while
464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
466 return (struct parent_scratch *)
468 ((__get_parent_scratch_offset(ce) -
469 LRC_STATE_OFFSET) / sizeof(u32)));
472 static struct guc_process_desc_v69 *
473 __get_process_desc_v69(struct intel_context *ce)
475 struct parent_scratch *ps = __get_parent_scratch(ce);
477 return &ps->descs.pdesc;
480 static struct guc_sched_wq_desc *
481 __get_wq_desc_v70(struct intel_context *ce)
483 struct parent_scratch *ps = __get_parent_scratch(ce);
485 return &ps->descs.wq_desc;
488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
491 * Check for space in work queue. Caching a value of head pointer in
492 * intel_context structure in order reduce the number accesses to shared
493 * GPU memory which may be across a PCIe bus.
495 #define AVAILABLE_SPACE \
496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
497 if (wqi_size > AVAILABLE_SPACE) {
498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
500 if (wqi_size > AVAILABLE_SPACE)
503 #undef AVAILABLE_SPACE
505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
510 struct intel_context *ce = xa_load(&guc->context_lookup, id);
512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
537 (void **)&guc->lrc_desc_pool_vaddr_v69);
544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
546 if (!guc->lrc_desc_pool_vaddr_v69)
549 guc->lrc_desc_pool_vaddr_v69 = NULL;
550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
553 static inline bool guc_submission_initialized(struct intel_guc *guc)
555 return guc->submission_initialized;
558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
563 memset(desc, 0, sizeof(*desc));
566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
568 return __get_context(guc, id);
571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
572 struct intel_context *ce)
577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
578 * lower level functions directly.
580 xa_lock_irqsave(&guc->context_lookup, flags);
581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
582 xa_unlock_irqrestore(&guc->context_lookup, flags);
585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
589 if (unlikely(!guc_submission_initialized(guc)))
592 _reset_lrc_desc_v69(guc, id);
595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
596 * the lower level functions directly.
598 xa_lock_irqsave(&guc->context_lookup, flags);
599 __xa_erase(&guc->context_lookup, id);
600 xa_unlock_irqrestore(&guc->context_lookup, flags);
603 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
606 wake_up_all(&guc->ct.wq);
609 static int guc_submission_send_busy_loop(struct intel_guc *guc,
616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
617 * so we don't handle the case where we don't get a reply because we
618 * aborted the send due to the channel being busy.
620 GEM_BUG_ON(g2h_len_dw && !loop);
623 atomic_inc(&guc->outstanding_submission_g2h);
625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
633 const int state = interruptible ?
634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
638 GEM_BUG_ON(timeout < 0);
640 if (!atomic_read(wait_var))
647 prepare_to_wait(&guc->ct.wq, &wait, state);
649 if (!atomic_read(wait_var))
652 if (signal_pending_state(state, current)) {
662 timeout = io_schedule_timeout(timeout);
664 finish_wait(&guc->ct.wq, &wait);
666 return (timeout < 0) ? timeout : 0;
669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
674 return intel_guc_wait_for_pending_msg(guc,
675 &guc->outstanding_submission_g2h,
679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
680 static int try_context_registration(struct intel_context *ce, bool loop);
682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
685 struct intel_context *ce = request_to_scheduling_context(rq);
691 lockdep_assert_held(&rq->engine->sched_engine->lock);
694 * Corner case where requests were sitting in the priority list or a
695 * request resubmitted after the context was banned.
697 if (unlikely(!intel_context_is_schedulable(ce))) {
698 i915_request_put(i915_request_mark_eio(rq));
699 intel_engine_signal_breadcrumbs(ce->engine);
703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
704 GEM_BUG_ON(context_guc_id_invalid(ce));
706 if (context_policy_required(ce)) {
707 err = guc_context_policy_init_v70(ce, false);
712 spin_lock(&ce->guc_state.lock);
715 * The request / context will be run on the hardware when scheduling
716 * gets enabled in the unblock. For multi-lrc we still submit the
717 * context to move the LRC tails.
719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
722 enabled = context_enabled(ce) || context_blocked(ce);
725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
726 action[len++] = ce->guc_id.id;
727 action[len++] = GUC_CONTEXT_ENABLE;
728 set_context_pending_enable(ce);
729 intel_context_get(ce);
730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
733 action[len++] = ce->guc_id.id;
736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
737 if (!enabled && !err) {
738 trace_intel_context_sched_enable(ce);
739 atomic_inc(&guc->outstanding_submission_g2h);
740 set_context_enabled(ce);
743 * Without multi-lrc KMD does the submission step (moving the
744 * lrc tail) so enabling scheduling is sufficient to submit the
745 * context. This isn't the case in multi-lrc submission as the
746 * GuC needs to move the tails, hence the need for another H2G
747 * to submit a multi-lrc context after enabling scheduling.
749 if (intel_context_is_parent(ce)) {
750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
751 err = intel_guc_send_nb(guc, action, len - 1, 0);
753 } else if (!enabled) {
754 clr_context_pending_enable(ce);
755 intel_context_put(ce);
758 trace_i915_request_guc_submit(rq);
761 spin_unlock(&ce->guc_state.lock);
765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
767 int ret = __guc_add_request(guc, rq);
769 if (unlikely(ret == -EBUSY)) {
770 guc->stalled_request = rq;
771 guc->submission_stall_reason = STALL_ADD_REQUEST;
777 static inline void guc_set_lrc_tail(struct i915_request *rq)
779 rq->context->lrc_reg_state[CTX_RING_TAIL] =
780 intel_ring_set_tail(rq->ring, rq->tail);
783 static inline int rq_prio(const struct i915_request *rq)
785 return rq->sched.attr.priority;
788 static bool is_multi_lrc_rq(struct i915_request *rq)
790 return intel_context_is_parallel(rq->context);
793 static bool can_merge_rq(struct i915_request *rq,
794 struct i915_request *last)
796 return request_to_scheduling_context(rq) ==
797 request_to_scheduling_context(last);
800 static u32 wq_space_until_wrap(struct intel_context *ce)
802 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
805 static void write_wqi(struct intel_context *ce, u32 wqi_size)
807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
810 * Ensure WQI are visible before updating tail
812 intel_guc_write_barrier(ce_to_guc(ce));
814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
819 static int guc_wq_noop_append(struct intel_context *ce)
821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
830 FIELD_PREP(WQ_LEN_MASK, len_dw);
831 ce->parallel.guc.wqi_tail = 0;
836 static int __guc_wq_item_append(struct i915_request *rq)
838 struct intel_context *ce = request_to_scheduling_context(rq);
839 struct intel_context *child;
840 unsigned int wqi_size = (ce->parallel.number_children + 4) *
843 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
846 /* Ensure context is in correct state updating work queue */
847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
848 GEM_BUG_ON(context_guc_id_invalid(ce));
849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
852 /* Insert NOOP if this work queue item will wrap the tail pointer. */
853 if (wqi_size > wq_space_until_wrap(ce)) {
854 ret = guc_wq_noop_append(ce);
859 wqi = get_wq_pointer(ce, wqi_size);
863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
866 FIELD_PREP(WQ_LEN_MASK, len_dw);
867 *wqi++ = ce->lrc.lrca;
868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
870 *wqi++ = 0; /* fence_id */
871 for_each_child(ce, child)
872 *wqi++ = child->ring->tail / sizeof(u64);
874 write_wqi(ce, wqi_size);
879 static int guc_wq_item_append(struct intel_guc *guc,
880 struct i915_request *rq)
882 struct intel_context *ce = request_to_scheduling_context(rq);
885 if (unlikely(!intel_context_is_schedulable(ce)))
888 ret = __guc_wq_item_append(rq);
889 if (unlikely(ret == -EBUSY)) {
890 guc->stalled_request = rq;
891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
897 static bool multi_lrc_submit(struct i915_request *rq)
899 struct intel_context *ce = request_to_scheduling_context(rq);
901 intel_ring_set_tail(rq->ring, rq->tail);
904 * We expect the front end (execbuf IOCTL) to set this flag on the last
905 * request generated from a multi-BB submission. This indicates to the
906 * backend (GuC interface) that we should submit this context thus
907 * submitting all the requests generated in parallel.
909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
910 !intel_context_is_schedulable(ce);
913 static int guc_dequeue_one_context(struct intel_guc *guc)
915 struct i915_sched_engine * const sched_engine = guc->sched_engine;
916 struct i915_request *last = NULL;
921 lockdep_assert_held(&sched_engine->lock);
923 if (guc->stalled_request) {
925 last = guc->stalled_request;
927 switch (guc->submission_stall_reason) {
928 case STALL_REGISTER_CONTEXT:
929 goto register_context;
930 case STALL_MOVE_LRC_TAIL:
932 case STALL_ADD_REQUEST:
935 MISSING_CASE(guc->submission_stall_reason);
939 while ((rb = rb_first_cached(&sched_engine->queue))) {
940 struct i915_priolist *p = to_priolist(rb);
941 struct i915_request *rq, *rn;
943 priolist_for_each_request_consume(rq, rn, p) {
944 if (last && !can_merge_rq(rq, last))
945 goto register_context;
947 list_del_init(&rq->sched.link);
949 __i915_request_submit(rq);
951 trace_i915_request_in(rq, 0);
954 if (is_multi_lrc_rq(rq)) {
956 * We need to coalesce all multi-lrc requests in
957 * a relationship into a single H2G. We are
958 * guaranteed that all of these requests will be
959 * submitted sequentially.
961 if (multi_lrc_submit(rq)) {
963 goto register_context;
970 rb_erase_cached(&p->node, &sched_engine->queue);
971 i915_priolist_free(p);
976 struct intel_context *ce = request_to_scheduling_context(last);
978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
979 intel_context_is_schedulable(ce))) {
980 ret = try_context_registration(ce, false);
981 if (unlikely(ret == -EPIPE)) {
983 } else if (ret == -EBUSY) {
984 guc->stalled_request = last;
985 guc->submission_stall_reason =
986 STALL_REGISTER_CONTEXT;
987 goto schedule_tasklet;
988 } else if (ret != 0) {
989 GEM_WARN_ON(ret); /* Unexpected */
995 if (is_multi_lrc_rq(last)) {
996 ret = guc_wq_item_append(guc, last);
998 goto schedule_tasklet;
999 } else if (ret != 0) {
1000 GEM_WARN_ON(ret); /* Unexpected */
1004 guc_set_lrc_tail(last);
1008 ret = guc_add_request(guc, last);
1009 if (unlikely(ret == -EPIPE)) {
1011 } else if (ret == -EBUSY) {
1012 goto schedule_tasklet;
1013 } else if (ret != 0) {
1014 GEM_WARN_ON(ret); /* Unexpected */
1019 guc->stalled_request = NULL;
1020 guc->submission_stall_reason = STALL_NONE;
1024 sched_engine->tasklet.callback = NULL;
1025 tasklet_disable_nosync(&sched_engine->tasklet);
1029 tasklet_schedule(&sched_engine->tasklet);
1033 static void guc_submission_tasklet(struct tasklet_struct *t)
1035 struct i915_sched_engine *sched_engine =
1036 from_tasklet(sched_engine, t, tasklet);
1037 unsigned long flags;
1040 spin_lock_irqsave(&sched_engine->lock, flags);
1043 loop = guc_dequeue_one_context(sched_engine->private_data);
1046 i915_sched_engine_reset_on_empty(sched_engine);
1048 spin_unlock_irqrestore(&sched_engine->lock, flags);
1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1053 if (iir & GT_RENDER_USER_INTERRUPT)
1054 intel_engine_signal_breadcrumbs(engine);
1057 static void __guc_context_destroy(struct intel_context *ce);
1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1059 static void guc_signal_context_fence(struct intel_context *ce);
1060 static void guc_cancel_context_requests(struct intel_context *ce);
1061 static void guc_blocked_fence_complete(struct intel_context *ce);
1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1065 struct intel_context *ce;
1066 unsigned long index, flags;
1067 bool pending_disable, pending_enable, deregister, destroyed, banned;
1069 xa_lock_irqsave(&guc->context_lookup, flags);
1070 xa_for_each(&guc->context_lookup, index, ce) {
1072 * Corner case where the ref count on the object is zero but and
1073 * deregister G2H was lost. In this case we don't touch the ref
1074 * count and finish the destroy of the context.
1076 bool do_put = kref_get_unless_zero(&ce->ref);
1078 xa_unlock(&guc->context_lookup);
1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
1082 /* successful cancel so jump straight to close it */
1083 intel_context_sched_disable_unpin(ce);
1086 spin_lock(&ce->guc_state.lock);
1089 * Once we are at this point submission_disabled() is guaranteed
1090 * to be visible to all callers who set the below flags (see above
1091 * flush and flushes in reset_prepare). If submission_disabled()
1092 * is set, the caller shouldn't set these flags.
1095 destroyed = context_destroyed(ce);
1096 pending_enable = context_pending_enable(ce);
1097 pending_disable = context_pending_disable(ce);
1098 deregister = context_wait_for_deregister_to_register(ce);
1099 banned = context_banned(ce);
1100 init_sched_state(ce);
1102 spin_unlock(&ce->guc_state.lock);
1104 if (pending_enable || destroyed || deregister) {
1105 decr_outstanding_submission_g2h(guc);
1107 guc_signal_context_fence(ce);
1109 intel_gt_pm_put_async(guc_to_gt(guc));
1110 release_guc_id(guc, ce);
1111 __guc_context_destroy(ce);
1113 if (pending_enable || deregister)
1114 intel_context_put(ce);
1117 /* Not mutualy exclusive with above if statement. */
1118 if (pending_disable) {
1119 guc_signal_context_fence(ce);
1121 guc_cancel_context_requests(ce);
1122 intel_engine_signal_breadcrumbs(ce->engine);
1124 intel_context_sched_disable_unpin(ce);
1125 decr_outstanding_submission_g2h(guc);
1127 spin_lock(&ce->guc_state.lock);
1128 guc_blocked_fence_complete(ce);
1129 spin_unlock(&ce->guc_state.lock);
1131 intel_context_put(ce);
1135 intel_context_put(ce);
1136 xa_lock(&guc->context_lookup);
1138 xa_unlock_irqrestore(&guc->context_lookup, flags);
1142 * GuC stores busyness stats for each engine at context in/out boundaries. A
1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1149 * active. For an active engine total busyness = total + (now - start), where
1150 * 'now' is the time at which the busyness is sampled. For inactive engine,
1151 * total busyness = total.
1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1155 * The start and total values provided by GuC are 32 bits and wrap around in a
1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1157 * increasing ns values, there is a need for this implementation to account for
1158 * overflows and extend the GuC provided values to 64 bits before returning
1159 * busyness to the user. In order to do that, a worker runs periodically at
1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1161 * 27 seconds for a gt clock frequency of 19.2 MHz).
1164 #define WRAP_TIME_CLKS U32_MAX
1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1173 if (new_start == lower_32_bits(*prev_start))
1177 * When gt is unparked, we update the gt timestamp and start the ping
1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1179 * is unparked, all switched in contexts will have a start time that is
1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1182 * If neither gt_stamp nor new_start has rolled over, then the
1183 * gt_stamp_hi does not need to be adjusted, however if one of them has
1184 * rolled over, we need to adjust gt_stamp_hi accordingly.
1186 * The below conditions address the cases of new_start rollover and
1187 * gt_stamp_last rollover respectively.
1189 if (new_start < gt_stamp_last &&
1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1193 if (new_start > gt_stamp_last &&
1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1200 #define record_read(map_, field_) \
1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1205 * we run into a race where the value read is inconsistent. Sometimes the
1206 * inconsistency is in reading the upper MSB bytes of the last_in value when
1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1209 * determine validity of these values. Instead we read the values multiple times
1210 * until they are consistent. In test runs, 3 attempts results in consistent
1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1212 * any new occurences.
1214 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1215 u32 *last_in, u32 *id, u32 *total)
1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1221 *last_in = record_read(&rec_map, last_switch_in_stamp);
1222 *id = record_read(&rec_map, current_context_index);
1223 *total = record_read(&rec_map, total_runtime);
1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1226 record_read(&rec_map, current_context_index) == *id &&
1227 record_read(&rec_map, total_runtime) == *total)
1232 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1234 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1235 struct intel_guc *guc = &engine->gt->uc.guc;
1236 u32 last_switch, ctx_id, total;
1238 lockdep_assert_held(&guc->timestamp.lock);
1240 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1242 stats->running = ctx_id != ~0U && last_switch;
1244 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1247 * Instead of adjusting the total for overflow, just add the
1248 * difference from previous sample stats->total_gt_clks
1250 if (total && total != ~0U) {
1251 stats->total_gt_clks += (u32)(total - stats->prev_total);
1252 stats->prev_total = total;
1256 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1258 intel_wakeref_t wakeref;
1261 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1262 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1264 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1265 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1270 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1272 struct intel_gt *gt = guc_to_gt(guc);
1273 u32 gt_stamp_lo, gt_stamp_hi;
1276 lockdep_assert_held(&guc->timestamp.lock);
1278 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1279 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1280 MISC_STATUS1) >> guc->timestamp.shift;
1281 gt_stamp_lo = lower_32_bits(gpm_ts);
1284 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1287 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1291 * Unlike the execlist mode of submission total and active times are in terms of
1292 * gt clocks. The *now parameter is retained to return the cpu time at which the
1293 * busyness was sampled.
1295 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1297 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1298 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1299 struct intel_gt *gt = engine->gt;
1300 struct intel_guc *guc = >->uc.guc;
1301 u64 total, gt_stamp_saved;
1302 unsigned long flags;
1306 spin_lock_irqsave(&guc->timestamp.lock, flags);
1309 * If a reset happened, we risk reading partially updated engine
1310 * busyness from GuC, so we just use the driver stored copy of busyness.
1311 * Synchronize with gt reset using reset_count and the
1312 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1313 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1314 * usable by checking the flag afterwards.
1316 reset_count = i915_reset_count(gpu_error);
1317 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1322 * The active busyness depends on start_gt_clk and gt_stamp.
1323 * gt_stamp is updated by i915 only when gt is awake and the
1324 * start_gt_clk is derived from GuC state. To get a consistent
1325 * view of activity, we query the GuC state only if gt is awake.
1327 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1328 stats_saved = *stats;
1329 gt_stamp_saved = guc->timestamp.gt_stamp;
1331 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1332 * start_gt_clk' calculation below for active engines.
1334 guc_update_engine_gt_clks(engine);
1335 guc_update_pm_timestamp(guc, now);
1336 intel_gt_pm_put_async(gt);
1337 if (i915_reset_count(gpu_error) != reset_count) {
1338 *stats = stats_saved;
1339 guc->timestamp.gt_stamp = gt_stamp_saved;
1343 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1344 if (stats->running) {
1345 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1347 total += intel_gt_clock_interval_to_ns(gt, clk);
1350 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1352 return ns_to_ktime(total);
1355 static void guc_enable_busyness_worker(struct intel_guc *guc)
1357 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay);
1360 static void guc_cancel_busyness_worker(struct intel_guc *guc)
1362 cancel_delayed_work_sync(&guc->timestamp.work);
1365 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1367 struct intel_gt *gt = guc_to_gt(guc);
1368 struct intel_engine_cs *engine;
1369 enum intel_engine_id id;
1370 unsigned long flags;
1373 guc_cancel_busyness_worker(guc);
1375 spin_lock_irqsave(&guc->timestamp.lock, flags);
1377 guc_update_pm_timestamp(guc, &unused);
1378 for_each_engine(engine, gt, id) {
1379 guc_update_engine_gt_clks(engine);
1380 engine->stats.guc.prev_total = 0;
1383 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1386 static void __update_guc_busyness_stats(struct intel_guc *guc)
1388 struct intel_gt *gt = guc_to_gt(guc);
1389 struct intel_engine_cs *engine;
1390 enum intel_engine_id id;
1391 unsigned long flags;
1394 guc->timestamp.last_stat_jiffies = jiffies;
1396 spin_lock_irqsave(&guc->timestamp.lock, flags);
1398 guc_update_pm_timestamp(guc, &unused);
1399 for_each_engine(engine, gt, id)
1400 guc_update_engine_gt_clks(engine);
1402 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1405 static void __guc_context_update_stats(struct intel_context *ce)
1407 struct intel_guc *guc = ce_to_guc(ce);
1408 unsigned long flags;
1410 spin_lock_irqsave(&guc->timestamp.lock, flags);
1411 lrc_update_runtime(ce);
1412 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1415 static void guc_context_update_stats(struct intel_context *ce)
1417 if (!intel_context_pin_if_active(ce))
1420 __guc_context_update_stats(ce);
1421 intel_context_unpin(ce);
1424 static void guc_timestamp_ping(struct work_struct *wrk)
1426 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1427 timestamp.work.work);
1428 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1429 struct intel_gt *gt = guc_to_gt(guc);
1430 struct intel_context *ce;
1431 intel_wakeref_t wakeref;
1432 unsigned long index;
1436 * Ideally the busyness worker should take a gt pm wakeref because the
1437 * worker only needs to be active while gt is awake. However, the
1438 * gt_park path cancels the worker synchronously and this complicates
1439 * the flow if the worker is also running at the same time. The cancel
1440 * waits for the worker and when the worker releases the wakeref, that
1441 * would call gt_park and would lead to a deadlock.
1443 * The resolution is to take the global pm wakeref if runtime pm is
1444 * already active. If not, we don't need to update the busyness stats as
1445 * the stats would already be updated when the gt was parked.
1448 * - We do not requeue the worker if we cannot take a reference to runtime
1449 * pm since intel_guc_busyness_unpark would requeue the worker in the
1452 * - If the gt was parked longer than time taken for GT timestamp to roll
1453 * over, we ignore those rollovers since we don't care about tracking
1454 * the exact GT time. We only care about roll overs when the gt is
1455 * active and running workloads.
1457 * - There is a window of time between gt_park and runtime suspend,
1458 * where the worker may run. This is acceptable since the worker will
1459 * not find any new data to update busyness.
1461 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm);
1466 * Synchronize with gt reset to make sure the worker does not
1467 * corrupt the engine/guc stats. NB: can't actually block waiting
1468 * for a reset to complete as the reset requires flushing out
1469 * this worker thread if started. So waiting would deadlock.
1471 ret = intel_gt_reset_trylock(gt, &srcu);
1475 __update_guc_busyness_stats(guc);
1477 /* adjust context stats for overflow */
1478 xa_for_each(&guc->context_lookup, index, ce)
1479 guc_context_update_stats(ce);
1481 intel_gt_reset_unlock(gt, srcu);
1483 guc_enable_busyness_worker(guc);
1486 intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
1489 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1491 u32 offset = intel_guc_engine_usage_offset(guc);
1493 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1498 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1501 static int guc_init_engine_stats(struct intel_guc *guc)
1503 struct intel_gt *gt = guc_to_gt(guc);
1504 intel_wakeref_t wakeref;
1507 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1508 ret = guc_action_enable_usage_stats(guc);
1511 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
1513 guc_enable_busyness_worker(guc);
1518 static void guc_fini_engine_stats(struct intel_guc *guc)
1520 guc_cancel_busyness_worker(guc);
1523 void intel_guc_busyness_park(struct intel_gt *gt)
1525 struct intel_guc *guc = >->uc.guc;
1527 if (!guc_submission_initialized(guc))
1531 * There is a race with suspend flow where the worker runs after suspend
1532 * and causes an unclaimed register access warning. Cancel the worker
1533 * synchronously here.
1535 guc_cancel_busyness_worker(guc);
1538 * Before parking, we should sample engine busyness stats if we need to.
1539 * We can skip it if we are less than half a ping from the last time we
1540 * sampled the busyness stats.
1542 if (guc->timestamp.last_stat_jiffies &&
1543 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1544 (guc->timestamp.ping_delay / 2)))
1547 __update_guc_busyness_stats(guc);
1550 void intel_guc_busyness_unpark(struct intel_gt *gt)
1552 struct intel_guc *guc = >->uc.guc;
1553 unsigned long flags;
1556 if (!guc_submission_initialized(guc))
1559 spin_lock_irqsave(&guc->timestamp.lock, flags);
1560 guc_update_pm_timestamp(guc, &unused);
1561 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1562 guc_enable_busyness_worker(guc);
1566 submission_disabled(struct intel_guc *guc)
1568 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1570 return unlikely(!sched_engine ||
1571 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1572 intel_gt_is_wedged(guc_to_gt(guc)));
1575 static void disable_submission(struct intel_guc *guc)
1577 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1579 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1580 GEM_BUG_ON(!guc->ct.enabled);
1581 __tasklet_disable_sync_once(&sched_engine->tasklet);
1582 sched_engine->tasklet.callback = NULL;
1586 static void enable_submission(struct intel_guc *guc)
1588 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1589 unsigned long flags;
1591 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1592 sched_engine->tasklet.callback = guc_submission_tasklet;
1593 wmb(); /* Make sure callback visible */
1594 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1595 __tasklet_enable(&sched_engine->tasklet)) {
1596 GEM_BUG_ON(!guc->ct.enabled);
1598 /* And kick in case we missed a new request submission. */
1599 tasklet_hi_schedule(&sched_engine->tasklet);
1601 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1604 static void guc_flush_submissions(struct intel_guc *guc)
1606 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1607 unsigned long flags;
1609 spin_lock_irqsave(&sched_engine->lock, flags);
1610 spin_unlock_irqrestore(&sched_engine->lock, flags);
1613 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1615 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1617 if (unlikely(!guc_submission_initialized(guc))) {
1618 /* Reset called during driver load? GuC not yet initialised! */
1622 intel_gt_park_heartbeats(guc_to_gt(guc));
1623 disable_submission(guc);
1624 guc->interrupts.disable(guc);
1625 __reset_guc_busyness_stats(guc);
1627 /* Flush IRQ handler */
1628 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1629 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1631 guc_flush_submissions(guc);
1632 guc_flush_destroyed_contexts(guc);
1633 flush_work(&guc->ct.requests.worker);
1635 scrub_guc_desc_for_outstanding_g2h(guc);
1638 static struct intel_engine_cs *
1639 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1641 struct intel_engine_cs *engine;
1642 intel_engine_mask_t tmp, mask = ve->mask;
1643 unsigned int num_siblings = 0;
1645 for_each_engine_masked(engine, ve->gt, mask, tmp)
1646 if (num_siblings++ == sibling)
1652 static inline struct intel_engine_cs *
1653 __context_to_physical_engine(struct intel_context *ce)
1655 struct intel_engine_cs *engine = ce->engine;
1657 if (intel_engine_is_virtual(engine))
1658 engine = guc_virtual_get_sibling(engine, 0);
1663 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1665 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1667 if (!intel_context_is_schedulable(ce))
1670 GEM_BUG_ON(!intel_context_is_pinned(ce));
1673 * We want a simple context + ring to execute the breadcrumb update.
1674 * We cannot rely on the context being intact across the GPU hang,
1675 * so clear it and rebuild just what we need for the breadcrumb.
1676 * All pending requests for this context will be zapped, and any
1677 * future request will be after userspace has had the opportunity
1678 * to recreate its own state.
1681 lrc_init_regs(ce, engine, true);
1683 /* Rerun the request; its payload has been neutered (if guilty). */
1684 lrc_update_regs(ce, engine, head);
1687 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1690 * Wa_22011802037: In addition to stopping the cs, we need
1691 * to wait for any pending mi force wakeups
1693 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
1694 (GRAPHICS_VER(engine->i915) >= 11 &&
1695 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
1696 intel_engine_stop_cs(engine);
1697 intel_engine_wait_for_pending_mi_fw(engine);
1701 static void guc_reset_nop(struct intel_engine_cs *engine)
1705 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1710 __unwind_incomplete_requests(struct intel_context *ce)
1712 struct i915_request *rq, *rn;
1713 struct list_head *pl;
1714 int prio = I915_PRIORITY_INVALID;
1715 struct i915_sched_engine * const sched_engine =
1716 ce->engine->sched_engine;
1717 unsigned long flags;
1719 spin_lock_irqsave(&sched_engine->lock, flags);
1720 spin_lock(&ce->guc_state.lock);
1721 list_for_each_entry_safe_reverse(rq, rn,
1722 &ce->guc_state.requests,
1724 if (i915_request_completed(rq))
1727 list_del_init(&rq->sched.link);
1728 __i915_request_unsubmit(rq);
1730 /* Push the request back into the queue for later resubmission. */
1731 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1732 if (rq_prio(rq) != prio) {
1734 pl = i915_sched_lookup_priolist(sched_engine, prio);
1736 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1738 list_add(&rq->sched.link, pl);
1739 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1741 spin_unlock(&ce->guc_state.lock);
1742 spin_unlock_irqrestore(&sched_engine->lock, flags);
1745 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1748 struct i915_request *rq;
1749 unsigned long flags;
1751 int i, number_children = ce->parallel.number_children;
1752 struct intel_context *parent = ce;
1754 GEM_BUG_ON(intel_context_is_child(ce));
1756 intel_context_get(ce);
1759 * GuC will implicitly mark the context as non-schedulable when it sends
1760 * the reset notification. Make sure our state reflects this change. The
1761 * context will be marked enabled on resubmission.
1763 spin_lock_irqsave(&ce->guc_state.lock, flags);
1764 clr_context_enabled(ce);
1765 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1768 * For each context in the relationship find the hanging request
1769 * resetting each context / request as needed
1771 for (i = 0; i < number_children + 1; ++i) {
1772 if (!intel_context_is_pinned(ce))
1776 rq = intel_context_get_active_request(ce);
1778 head = ce->ring->tail;
1782 if (i915_request_started(rq))
1783 guilty = stalled & ce->engine->mask;
1785 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1786 head = intel_ring_wrap(ce->ring, rq->head);
1788 __i915_request_reset(rq, guilty);
1789 i915_request_put(rq);
1791 guc_reset_state(ce, head, guilty);
1793 if (i != number_children)
1794 ce = list_next_entry(ce, parallel.child_link);
1797 __unwind_incomplete_requests(parent);
1798 intel_context_put(parent);
1801 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1803 struct intel_context *ce;
1804 unsigned long index;
1805 unsigned long flags;
1807 if (unlikely(!guc_submission_initialized(guc))) {
1808 /* Reset called during driver load? GuC not yet initialised! */
1812 xa_lock_irqsave(&guc->context_lookup, flags);
1813 xa_for_each(&guc->context_lookup, index, ce) {
1814 if (!kref_get_unless_zero(&ce->ref))
1817 xa_unlock(&guc->context_lookup);
1819 if (intel_context_is_pinned(ce) &&
1820 !intel_context_is_child(ce))
1821 __guc_reset_context(ce, stalled);
1823 intel_context_put(ce);
1825 xa_lock(&guc->context_lookup);
1827 xa_unlock_irqrestore(&guc->context_lookup, flags);
1829 /* GuC is blown away, drop all references to contexts */
1830 xa_destroy(&guc->context_lookup);
1833 static void guc_cancel_context_requests(struct intel_context *ce)
1835 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1836 struct i915_request *rq;
1837 unsigned long flags;
1839 /* Mark all executing requests as skipped. */
1840 spin_lock_irqsave(&sched_engine->lock, flags);
1841 spin_lock(&ce->guc_state.lock);
1842 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1843 i915_request_put(i915_request_mark_eio(rq));
1844 spin_unlock(&ce->guc_state.lock);
1845 spin_unlock_irqrestore(&sched_engine->lock, flags);
1849 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1851 struct i915_request *rq, *rn;
1853 unsigned long flags;
1855 /* Can be called during boot if GuC fails to load */
1860 * Before we call engine->cancel_requests(), we should have exclusive
1861 * access to the submission state. This is arranged for us by the
1862 * caller disabling the interrupt generation, the tasklet and other
1863 * threads that may then access the same state, giving us a free hand
1864 * to reset state. However, we still need to let lockdep be aware that
1865 * we know this state may be accessed in hardirq context, so we
1866 * disable the irq around this manipulation and we want to keep
1867 * the spinlock focused on its duties and not accidentally conflate
1868 * coverage to the submission's irq state. (Similarly, although we
1869 * shouldn't need to disable irq around the manipulation of the
1870 * submission's irq state, we also wish to remind ourselves that
1873 spin_lock_irqsave(&sched_engine->lock, flags);
1875 /* Flush the queued requests to the timeline list (for retiring). */
1876 while ((rb = rb_first_cached(&sched_engine->queue))) {
1877 struct i915_priolist *p = to_priolist(rb);
1879 priolist_for_each_request_consume(rq, rn, p) {
1880 list_del_init(&rq->sched.link);
1882 __i915_request_submit(rq);
1884 i915_request_put(i915_request_mark_eio(rq));
1887 rb_erase_cached(&p->node, &sched_engine->queue);
1888 i915_priolist_free(p);
1891 /* Remaining _unready_ requests will be nop'ed when submitted */
1893 sched_engine->queue_priority_hint = INT_MIN;
1894 sched_engine->queue = RB_ROOT_CACHED;
1896 spin_unlock_irqrestore(&sched_engine->lock, flags);
1899 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1901 struct intel_context *ce;
1902 unsigned long index;
1903 unsigned long flags;
1905 xa_lock_irqsave(&guc->context_lookup, flags);
1906 xa_for_each(&guc->context_lookup, index, ce) {
1907 if (!kref_get_unless_zero(&ce->ref))
1910 xa_unlock(&guc->context_lookup);
1912 if (intel_context_is_pinned(ce) &&
1913 !intel_context_is_child(ce))
1914 guc_cancel_context_requests(ce);
1916 intel_context_put(ce);
1918 xa_lock(&guc->context_lookup);
1920 xa_unlock_irqrestore(&guc->context_lookup, flags);
1922 guc_cancel_sched_engine_requests(guc->sched_engine);
1924 /* GuC is blown away, drop all references to contexts */
1925 xa_destroy(&guc->context_lookup);
1928 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1930 /* Reset called during driver load or during wedge? */
1931 if (unlikely(!guc_submission_initialized(guc) ||
1932 intel_gt_is_wedged(guc_to_gt(guc)))) {
1937 * Technically possible for either of these values to be non-zero here,
1938 * but very unlikely + harmless. Regardless let's add a warn so we can
1939 * see in CI if this happens frequently / a precursor to taking down the
1942 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1943 atomic_set(&guc->outstanding_submission_g2h, 0);
1945 intel_guc_global_policies_update(guc);
1946 enable_submission(guc);
1947 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1950 static void destroyed_worker_func(struct work_struct *w);
1951 static void reset_fail_worker_func(struct work_struct *w);
1954 * Set up the memory resources to be shared with the GuC (via the GGTT)
1955 * at firmware loading time.
1957 int intel_guc_submission_init(struct intel_guc *guc)
1959 struct intel_gt *gt = guc_to_gt(guc);
1962 if (guc->submission_initialized)
1965 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) {
1966 ret = guc_lrc_desc_pool_create_v69(guc);
1971 guc->submission_state.guc_ids_bitmap =
1972 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1973 if (!guc->submission_state.guc_ids_bitmap) {
1978 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1979 guc->timestamp.shift = gpm_timestamp_shift(gt);
1980 guc->submission_initialized = true;
1985 guc_lrc_desc_pool_destroy_v69(guc);
1990 void intel_guc_submission_fini(struct intel_guc *guc)
1992 if (!guc->submission_initialized)
1995 guc_flush_destroyed_contexts(guc);
1996 guc_lrc_desc_pool_destroy_v69(guc);
1997 i915_sched_engine_put(guc->sched_engine);
1998 bitmap_free(guc->submission_state.guc_ids_bitmap);
1999 guc->submission_initialized = false;
2002 static inline void queue_request(struct i915_sched_engine *sched_engine,
2003 struct i915_request *rq,
2006 GEM_BUG_ON(!list_empty(&rq->sched.link));
2007 list_add_tail(&rq->sched.link,
2008 i915_sched_lookup_priolist(sched_engine, prio));
2009 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2010 tasklet_hi_schedule(&sched_engine->tasklet);
2013 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
2014 struct i915_request *rq)
2018 __i915_request_submit(rq);
2020 trace_i915_request_in(rq, 0);
2022 if (is_multi_lrc_rq(rq)) {
2023 if (multi_lrc_submit(rq)) {
2024 ret = guc_wq_item_append(guc, rq);
2026 ret = guc_add_request(guc, rq);
2029 guc_set_lrc_tail(rq);
2030 ret = guc_add_request(guc, rq);
2033 if (unlikely(ret == -EPIPE))
2034 disable_submission(guc);
2039 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
2041 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2042 struct intel_context *ce = request_to_scheduling_context(rq);
2044 return submission_disabled(guc) || guc->stalled_request ||
2045 !i915_sched_engine_is_empty(sched_engine) ||
2046 !ctx_id_mapped(guc, ce->guc_id.id);
2049 static void guc_submit_request(struct i915_request *rq)
2051 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2052 struct intel_guc *guc = &rq->engine->gt->uc.guc;
2053 unsigned long flags;
2055 /* Will be called from irq-context when using foreign fences. */
2056 spin_lock_irqsave(&sched_engine->lock, flags);
2058 if (need_tasklet(guc, rq))
2059 queue_request(sched_engine, rq, rq_prio(rq));
2060 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
2061 tasklet_hi_schedule(&sched_engine->tasklet);
2063 spin_unlock_irqrestore(&sched_engine->lock, flags);
2066 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
2070 GEM_BUG_ON(intel_context_is_child(ce));
2072 if (intel_context_is_parent(ce))
2073 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2074 NUMBER_MULTI_LRC_GUC_ID(guc),
2075 order_base_2(ce->parallel.number_children
2078 ret = ida_simple_get(&guc->submission_state.guc_ids,
2079 NUMBER_MULTI_LRC_GUC_ID(guc),
2080 guc->submission_state.num_guc_ids,
2081 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2083 if (unlikely(ret < 0))
2086 if (!intel_context_is_parent(ce))
2087 ++guc->submission_state.guc_ids_in_use;
2089 ce->guc_id.id = ret;
2093 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2095 GEM_BUG_ON(intel_context_is_child(ce));
2097 if (!context_guc_id_invalid(ce)) {
2098 if (intel_context_is_parent(ce)) {
2099 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2101 order_base_2(ce->parallel.number_children
2104 --guc->submission_state.guc_ids_in_use;
2105 ida_simple_remove(&guc->submission_state.guc_ids,
2108 clr_ctx_id_mapping(guc, ce->guc_id.id);
2109 set_context_guc_id_invalid(ce);
2111 if (!list_empty(&ce->guc_id.link))
2112 list_del_init(&ce->guc_id.link);
2115 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2117 unsigned long flags;
2119 spin_lock_irqsave(&guc->submission_state.lock, flags);
2120 __release_guc_id(guc, ce);
2121 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2124 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2126 struct intel_context *cn;
2128 lockdep_assert_held(&guc->submission_state.lock);
2129 GEM_BUG_ON(intel_context_is_child(ce));
2130 GEM_BUG_ON(intel_context_is_parent(ce));
2132 if (!list_empty(&guc->submission_state.guc_id_list)) {
2133 cn = list_first_entry(&guc->submission_state.guc_id_list,
2134 struct intel_context,
2137 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2138 GEM_BUG_ON(context_guc_id_invalid(cn));
2139 GEM_BUG_ON(intel_context_is_child(cn));
2140 GEM_BUG_ON(intel_context_is_parent(cn));
2142 list_del_init(&cn->guc_id.link);
2143 ce->guc_id.id = cn->guc_id.id;
2145 spin_lock(&cn->guc_state.lock);
2146 clr_context_registered(cn);
2147 spin_unlock(&cn->guc_state.lock);
2149 set_context_guc_id_invalid(cn);
2151 #ifdef CONFIG_DRM_I915_SELFTEST
2152 guc->number_guc_id_stolen++;
2161 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2165 lockdep_assert_held(&guc->submission_state.lock);
2166 GEM_BUG_ON(intel_context_is_child(ce));
2168 ret = new_guc_id(guc, ce);
2169 if (unlikely(ret < 0)) {
2170 if (intel_context_is_parent(ce))
2173 ret = steal_guc_id(guc, ce);
2178 if (intel_context_is_parent(ce)) {
2179 struct intel_context *child;
2182 for_each_child(ce, child)
2183 child->guc_id.id = ce->guc_id.id + i++;
2189 #define PIN_GUC_ID_TRIES 4
2190 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2193 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2195 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2198 spin_lock_irqsave(&guc->submission_state.lock, flags);
2200 might_lock(&ce->guc_state.lock);
2202 if (context_guc_id_invalid(ce)) {
2203 ret = assign_guc_id(guc, ce);
2206 ret = 1; /* Indidcates newly assigned guc_id */
2208 if (!list_empty(&ce->guc_id.link))
2209 list_del_init(&ce->guc_id.link);
2210 atomic_inc(&ce->guc_id.ref);
2213 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2216 * -EAGAIN indicates no guc_id are available, let's retire any
2217 * outstanding requests to see if that frees up a guc_id. If the first
2218 * retire didn't help, insert a sleep with the timeslice duration before
2219 * attempting to retire more requests. Double the sleep period each
2220 * subsequent pass before finally giving up. The sleep period has max of
2221 * 100ms and minimum of 1ms.
2223 if (ret == -EAGAIN && --tries) {
2224 if (PIN_GUC_ID_TRIES - tries > 1) {
2225 unsigned int timeslice_shifted =
2226 ce->engine->props.timeslice_duration_ms <<
2227 (PIN_GUC_ID_TRIES - tries - 2);
2228 unsigned int max = min_t(unsigned int, 100,
2231 msleep(max_t(unsigned int, max, 1));
2233 intel_gt_retire_requests(guc_to_gt(guc));
2240 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2242 unsigned long flags;
2244 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2245 GEM_BUG_ON(intel_context_is_child(ce));
2247 if (unlikely(context_guc_id_invalid(ce) ||
2248 intel_context_is_parent(ce)))
2251 spin_lock_irqsave(&guc->submission_state.lock, flags);
2252 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2253 !atomic_read(&ce->guc_id.ref))
2254 list_add_tail(&ce->guc_id.link,
2255 &guc->submission_state.guc_id_list);
2256 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2259 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2260 struct intel_context *ce,
2265 struct intel_context *child;
2266 u32 action[4 + MAX_ENGINE_INSTANCE];
2269 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2271 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2272 action[len++] = guc_id;
2273 action[len++] = ce->parallel.number_children + 1;
2274 action[len++] = offset;
2275 for_each_child(ce, child) {
2276 offset += sizeof(struct guc_lrc_desc_v69);
2277 action[len++] = offset;
2280 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2283 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2284 struct intel_context *ce,
2285 struct guc_ctxt_registration_info *info,
2288 struct intel_context *child;
2289 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2293 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2295 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2296 action[len++] = info->flags;
2297 action[len++] = info->context_idx;
2298 action[len++] = info->engine_class;
2299 action[len++] = info->engine_submit_mask;
2300 action[len++] = info->wq_desc_lo;
2301 action[len++] = info->wq_desc_hi;
2302 action[len++] = info->wq_base_lo;
2303 action[len++] = info->wq_base_hi;
2304 action[len++] = info->wq_size;
2305 action[len++] = ce->parallel.number_children + 1;
2306 action[len++] = info->hwlrca_lo;
2307 action[len++] = info->hwlrca_hi;
2309 next_id = info->context_idx + 1;
2310 for_each_child(ce, child) {
2311 GEM_BUG_ON(next_id++ != child->guc_id.id);
2314 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2315 * only supports 32 bit currently.
2317 action[len++] = lower_32_bits(child->lrc.lrca);
2318 action[len++] = upper_32_bits(child->lrc.lrca);
2321 GEM_BUG_ON(len > ARRAY_SIZE(action));
2323 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2326 static int __guc_action_register_context_v69(struct intel_guc *guc,
2332 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2337 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2341 static int __guc_action_register_context_v70(struct intel_guc *guc,
2342 struct guc_ctxt_registration_info *info,
2346 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2350 info->engine_submit_mask,
2360 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2364 static void prepare_context_registration_info_v69(struct intel_context *ce);
2365 static void prepare_context_registration_info_v70(struct intel_context *ce,
2366 struct guc_ctxt_registration_info *info);
2369 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2371 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2372 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2374 prepare_context_registration_info_v69(ce);
2376 if (intel_context_is_parent(ce))
2377 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2380 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2385 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2387 struct guc_ctxt_registration_info info;
2389 prepare_context_registration_info_v70(ce, &info);
2391 if (intel_context_is_parent(ce))
2392 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2394 return __guc_action_register_context_v70(guc, &info, loop);
2397 static int register_context(struct intel_context *ce, bool loop)
2399 struct intel_guc *guc = ce_to_guc(ce);
2402 GEM_BUG_ON(intel_context_is_child(ce));
2403 trace_intel_context_register(ce);
2405 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2406 ret = register_context_v70(guc, ce, loop);
2408 ret = register_context_v69(guc, ce, loop);
2411 unsigned long flags;
2413 spin_lock_irqsave(&ce->guc_state.lock, flags);
2414 set_context_registered(ce);
2415 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2417 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2418 guc_context_policy_init_v70(ce, loop);
2424 static int __guc_action_deregister_context(struct intel_guc *guc,
2428 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2432 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2433 G2H_LEN_DW_DEREGISTER_CONTEXT,
2437 static int deregister_context(struct intel_context *ce, u32 guc_id)
2439 struct intel_guc *guc = ce_to_guc(ce);
2441 GEM_BUG_ON(intel_context_is_child(ce));
2442 trace_intel_context_deregister(ce);
2444 return __guc_action_deregister_context(guc, guc_id);
2447 static inline void clear_children_join_go_memory(struct intel_context *ce)
2449 struct parent_scratch *ps = __get_parent_scratch(ce);
2452 ps->go.semaphore = 0;
2453 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2454 ps->join[i].semaphore = 0;
2457 static inline u32 get_children_go_value(struct intel_context *ce)
2459 return __get_parent_scratch(ce)->go.semaphore;
2462 static inline u32 get_children_join_value(struct intel_context *ce,
2465 return __get_parent_scratch(ce)->join[child_index].semaphore;
2468 struct context_policy {
2470 struct guc_update_context_policy h2g;
2473 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2475 size_t bytes = sizeof(policy->h2g.header) +
2476 (sizeof(policy->h2g.klv[0]) * policy->count);
2478 return bytes / sizeof(u32);
2481 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2483 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2484 policy->h2g.header.ctx_id = guc_id;
2488 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2489 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2491 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2492 policy->h2g.klv[policy->count].kl = \
2493 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2494 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2495 policy->h2g.klv[policy->count].value = data; \
2499 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2500 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2501 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2502 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2504 #undef MAKE_CONTEXT_POLICY_ADD
2506 static int __guc_context_set_context_policies(struct intel_guc *guc,
2507 struct context_policy *policy,
2510 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2511 __guc_context_policy_action_size(policy),
2515 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2517 struct intel_engine_cs *engine = ce->engine;
2518 struct intel_guc *guc = &engine->gt->uc.guc;
2519 struct context_policy policy;
2520 u32 execution_quantum;
2521 u32 preemption_timeout;
2522 unsigned long flags;
2525 /* NB: For both of these, zero means disabled. */
2526 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2527 execution_quantum));
2528 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2529 preemption_timeout));
2530 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2531 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2533 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2535 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2536 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2537 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2539 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2540 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2542 ret = __guc_context_set_context_policies(guc, &policy, loop);
2544 spin_lock_irqsave(&ce->guc_state.lock, flags);
2546 set_context_policy_required(ce);
2548 clr_context_policy_required(ce);
2549 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2554 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2555 struct guc_lrc_desc_v69 *desc)
2557 desc->policy_flags = 0;
2559 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2560 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2562 /* NB: For both of these, zero means disabled. */
2563 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2564 desc->execution_quantum));
2565 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2566 desc->preemption_timeout));
2567 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2568 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2571 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2574 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2575 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2581 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2582 return GEN12_CTX_PRIORITY_NORMAL;
2583 case GUC_CLIENT_PRIORITY_NORMAL:
2584 return GEN12_CTX_PRIORITY_LOW;
2585 case GUC_CLIENT_PRIORITY_HIGH:
2586 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2587 return GEN12_CTX_PRIORITY_HIGH;
2591 static void prepare_context_registration_info_v69(struct intel_context *ce)
2593 struct intel_engine_cs *engine = ce->engine;
2594 struct intel_guc *guc = &engine->gt->uc.guc;
2595 u32 ctx_id = ce->guc_id.id;
2596 struct guc_lrc_desc_v69 *desc;
2597 struct intel_context *child;
2599 GEM_BUG_ON(!engine->mask);
2602 * Ensure LRC + CT vmas are is same region as write barrier is done
2603 * based on CT vma region.
2605 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2606 i915_gem_object_is_lmem(ce->ring->vma->obj));
2608 desc = __get_lrc_desc_v69(guc, ctx_id);
2610 desc->engine_class = engine_class_to_guc_class(engine->class);
2611 desc->engine_submit_mask = engine->logical_mask;
2612 desc->hw_context_desc = ce->lrc.lrca;
2613 desc->priority = ce->guc_state.prio;
2614 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2615 guc_context_policy_init_v69(engine, desc);
2618 * If context is a parent, we need to register a process descriptor
2619 * describing a work queue and register all child contexts.
2621 if (intel_context_is_parent(ce)) {
2622 struct guc_process_desc_v69 *pdesc;
2624 ce->parallel.guc.wqi_tail = 0;
2625 ce->parallel.guc.wqi_head = 0;
2627 desc->process_desc = i915_ggtt_offset(ce->state) +
2628 __get_parent_scratch_offset(ce);
2629 desc->wq_addr = i915_ggtt_offset(ce->state) +
2630 __get_wq_offset(ce);
2631 desc->wq_size = WQ_SIZE;
2633 pdesc = __get_process_desc_v69(ce);
2634 memset(pdesc, 0, sizeof(*(pdesc)));
2635 pdesc->stage_id = ce->guc_id.id;
2636 pdesc->wq_base_addr = desc->wq_addr;
2637 pdesc->wq_size_bytes = desc->wq_size;
2638 pdesc->wq_status = WQ_STATUS_ACTIVE;
2640 ce->parallel.guc.wq_head = &pdesc->head;
2641 ce->parallel.guc.wq_tail = &pdesc->tail;
2642 ce->parallel.guc.wq_status = &pdesc->wq_status;
2644 for_each_child(ce, child) {
2645 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2647 desc->engine_class =
2648 engine_class_to_guc_class(engine->class);
2649 desc->hw_context_desc = child->lrc.lrca;
2650 desc->priority = ce->guc_state.prio;
2651 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2652 guc_context_policy_init_v69(engine, desc);
2655 clear_children_join_go_memory(ce);
2659 static void prepare_context_registration_info_v70(struct intel_context *ce,
2660 struct guc_ctxt_registration_info *info)
2662 struct intel_engine_cs *engine = ce->engine;
2663 struct intel_guc *guc = &engine->gt->uc.guc;
2664 u32 ctx_id = ce->guc_id.id;
2666 GEM_BUG_ON(!engine->mask);
2669 * Ensure LRC + CT vmas are is same region as write barrier is done
2670 * based on CT vma region.
2672 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2673 i915_gem_object_is_lmem(ce->ring->vma->obj));
2675 memset(info, 0, sizeof(*info));
2676 info->context_idx = ctx_id;
2677 info->engine_class = engine_class_to_guc_class(engine->class);
2678 info->engine_submit_mask = engine->logical_mask;
2680 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2681 * only supports 32 bit currently.
2683 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2684 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2685 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2686 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2687 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2690 * If context is a parent, we need to register a process descriptor
2691 * describing a work queue and register all child contexts.
2693 if (intel_context_is_parent(ce)) {
2694 struct guc_sched_wq_desc *wq_desc;
2695 u64 wq_desc_offset, wq_base_offset;
2697 ce->parallel.guc.wqi_tail = 0;
2698 ce->parallel.guc.wqi_head = 0;
2700 wq_desc_offset = i915_ggtt_offset(ce->state) +
2701 __get_parent_scratch_offset(ce);
2702 wq_base_offset = i915_ggtt_offset(ce->state) +
2703 __get_wq_offset(ce);
2704 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2705 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2706 info->wq_base_lo = lower_32_bits(wq_base_offset);
2707 info->wq_base_hi = upper_32_bits(wq_base_offset);
2708 info->wq_size = WQ_SIZE;
2710 wq_desc = __get_wq_desc_v70(ce);
2711 memset(wq_desc, 0, sizeof(*wq_desc));
2712 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2714 ce->parallel.guc.wq_head = &wq_desc->head;
2715 ce->parallel.guc.wq_tail = &wq_desc->tail;
2716 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2718 clear_children_join_go_memory(ce);
2722 static int try_context_registration(struct intel_context *ce, bool loop)
2724 struct intel_engine_cs *engine = ce->engine;
2725 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2726 struct intel_guc *guc = &engine->gt->uc.guc;
2727 intel_wakeref_t wakeref;
2728 u32 ctx_id = ce->guc_id.id;
2729 bool context_registered;
2732 GEM_BUG_ON(!sched_state_is_init(ce));
2734 context_registered = ctx_id_mapped(guc, ctx_id);
2736 clr_ctx_id_mapping(guc, ctx_id);
2737 set_ctx_id_mapping(guc, ctx_id, ce);
2740 * The context_lookup xarray is used to determine if the hardware
2741 * context is currently registered. There are two cases in which it
2742 * could be registered either the guc_id has been stolen from another
2743 * context or the lrc descriptor address of this context has changed. In
2744 * either case the context needs to be deregistered with the GuC before
2745 * registering this context.
2747 if (context_registered) {
2749 unsigned long flags;
2751 trace_intel_context_steal_guc_id(ce);
2754 /* Seal race with Reset */
2755 spin_lock_irqsave(&ce->guc_state.lock, flags);
2756 disabled = submission_disabled(guc);
2757 if (likely(!disabled)) {
2758 set_context_wait_for_deregister_to_register(ce);
2759 intel_context_get(ce);
2761 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2762 if (unlikely(disabled)) {
2763 clr_ctx_id_mapping(guc, ctx_id);
2764 return 0; /* Will get registered later */
2768 * If stealing the guc_id, this ce has the same guc_id as the
2769 * context whose guc_id was stolen.
2771 with_intel_runtime_pm(runtime_pm, wakeref)
2772 ret = deregister_context(ce, ce->guc_id.id);
2773 if (unlikely(ret == -ENODEV))
2774 ret = 0; /* Will get registered later */
2776 with_intel_runtime_pm(runtime_pm, wakeref)
2777 ret = register_context(ce, loop);
2778 if (unlikely(ret == -EBUSY)) {
2779 clr_ctx_id_mapping(guc, ctx_id);
2780 } else if (unlikely(ret == -ENODEV)) {
2781 clr_ctx_id_mapping(guc, ctx_id);
2782 ret = 0; /* Will get registered later */
2789 static int __guc_context_pre_pin(struct intel_context *ce,
2790 struct intel_engine_cs *engine,
2791 struct i915_gem_ww_ctx *ww,
2794 return lrc_pre_pin(ce, engine, ww, vaddr);
2797 static int __guc_context_pin(struct intel_context *ce,
2798 struct intel_engine_cs *engine,
2801 if (i915_ggtt_offset(ce->state) !=
2802 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2803 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2806 * GuC context gets pinned in guc_request_alloc. See that function for
2807 * explaination of why.
2810 return lrc_pin(ce, engine, vaddr);
2813 static int guc_context_pre_pin(struct intel_context *ce,
2814 struct i915_gem_ww_ctx *ww,
2817 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2820 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2822 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2824 if (likely(!ret && !intel_context_is_barrier(ce)))
2825 intel_engine_pm_get(ce->engine);
2830 static void guc_context_unpin(struct intel_context *ce)
2832 struct intel_guc *guc = ce_to_guc(ce);
2834 __guc_context_update_stats(ce);
2835 unpin_guc_id(guc, ce);
2838 if (likely(!intel_context_is_barrier(ce)))
2839 intel_engine_pm_put_async(ce->engine);
2842 static void guc_context_post_unpin(struct intel_context *ce)
2847 static void __guc_context_sched_enable(struct intel_guc *guc,
2848 struct intel_context *ce)
2851 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2856 trace_intel_context_sched_enable(ce);
2858 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2859 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2862 static void __guc_context_sched_disable(struct intel_guc *guc,
2863 struct intel_context *ce,
2867 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2868 guc_id, /* ce->guc_id.id not stable */
2872 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2874 GEM_BUG_ON(intel_context_is_child(ce));
2875 trace_intel_context_sched_disable(ce);
2877 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2878 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2881 static void guc_blocked_fence_complete(struct intel_context *ce)
2883 lockdep_assert_held(&ce->guc_state.lock);
2885 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2886 i915_sw_fence_complete(&ce->guc_state.blocked);
2889 static void guc_blocked_fence_reinit(struct intel_context *ce)
2891 lockdep_assert_held(&ce->guc_state.lock);
2892 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2895 * This fence is always complete unless a pending schedule disable is
2896 * outstanding. We arm the fence here and complete it when we receive
2897 * the pending schedule disable complete message.
2899 i915_sw_fence_fini(&ce->guc_state.blocked);
2900 i915_sw_fence_reinit(&ce->guc_state.blocked);
2901 i915_sw_fence_await(&ce->guc_state.blocked);
2902 i915_sw_fence_commit(&ce->guc_state.blocked);
2905 static u16 prep_context_pending_disable(struct intel_context *ce)
2907 lockdep_assert_held(&ce->guc_state.lock);
2909 set_context_pending_disable(ce);
2910 clr_context_enabled(ce);
2911 guc_blocked_fence_reinit(ce);
2912 intel_context_get(ce);
2914 return ce->guc_id.id;
2917 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2919 struct intel_guc *guc = ce_to_guc(ce);
2920 unsigned long flags;
2921 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2922 intel_wakeref_t wakeref;
2926 GEM_BUG_ON(intel_context_is_child(ce));
2928 spin_lock_irqsave(&ce->guc_state.lock, flags);
2930 incr_context_blocked(ce);
2932 enabled = context_enabled(ce);
2933 if (unlikely(!enabled || submission_disabled(guc))) {
2935 clr_context_enabled(ce);
2936 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2937 return &ce->guc_state.blocked;
2941 * We add +2 here as the schedule disable complete CTB handler calls
2942 * intel_context_sched_disable_unpin (-2 to pin_count).
2944 atomic_add(2, &ce->pin_count);
2946 guc_id = prep_context_pending_disable(ce);
2948 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2950 with_intel_runtime_pm(runtime_pm, wakeref)
2951 __guc_context_sched_disable(guc, ce, guc_id);
2953 return &ce->guc_state.blocked;
2956 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2957 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2958 #define SCHED_STATE_NO_UNBLOCK \
2959 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2960 SCHED_STATE_PENDING_DISABLE | \
2963 static bool context_cant_unblock(struct intel_context *ce)
2965 lockdep_assert_held(&ce->guc_state.lock);
2967 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2968 context_guc_id_invalid(ce) ||
2969 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2970 !intel_context_is_pinned(ce);
2973 static void guc_context_unblock(struct intel_context *ce)
2975 struct intel_guc *guc = ce_to_guc(ce);
2976 unsigned long flags;
2977 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2978 intel_wakeref_t wakeref;
2981 GEM_BUG_ON(context_enabled(ce));
2982 GEM_BUG_ON(intel_context_is_child(ce));
2984 spin_lock_irqsave(&ce->guc_state.lock, flags);
2986 if (unlikely(submission_disabled(guc) ||
2987 context_cant_unblock(ce))) {
2991 set_context_pending_enable(ce);
2992 set_context_enabled(ce);
2993 intel_context_get(ce);
2996 decr_context_blocked(ce);
2998 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3001 with_intel_runtime_pm(runtime_pm, wakeref)
3002 __guc_context_sched_enable(guc, ce);
3006 static void guc_context_cancel_request(struct intel_context *ce,
3007 struct i915_request *rq)
3009 struct intel_context *block_context =
3010 request_to_scheduling_context(rq);
3012 if (i915_sw_fence_signaled(&rq->submit)) {
3013 struct i915_sw_fence *fence;
3015 intel_context_get(ce);
3016 fence = guc_context_block(block_context);
3017 i915_sw_fence_wait(fence);
3018 if (!i915_request_completed(rq)) {
3019 __i915_request_skip(rq);
3020 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
3024 guc_context_unblock(block_context);
3025 intel_context_put(ce);
3029 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
3031 u32 preemption_timeout)
3033 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3034 struct context_policy policy;
3036 __guc_context_policy_start_klv(&policy, guc_id);
3037 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
3038 __guc_context_set_context_policies(guc, &policy, true);
3041 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
3046 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3051 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
3052 unsigned int preempt_timeout_ms)
3054 struct intel_guc *guc = ce_to_guc(ce);
3055 struct intel_runtime_pm *runtime_pm =
3056 &ce->engine->gt->i915->runtime_pm;
3057 intel_wakeref_t wakeref;
3058 unsigned long flags;
3060 GEM_BUG_ON(intel_context_is_child(ce));
3062 guc_flush_submissions(guc);
3064 spin_lock_irqsave(&ce->guc_state.lock, flags);
3065 set_context_banned(ce);
3067 if (submission_disabled(guc) ||
3068 (!context_enabled(ce) && !context_pending_disable(ce))) {
3069 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3071 guc_cancel_context_requests(ce);
3072 intel_engine_signal_breadcrumbs(ce->engine);
3073 } else if (!context_pending_disable(ce)) {
3077 * We add +2 here as the schedule disable complete CTB handler
3078 * calls intel_context_sched_disable_unpin (-2 to pin_count).
3080 atomic_add(2, &ce->pin_count);
3082 guc_id = prep_context_pending_disable(ce);
3083 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3086 * In addition to disabling scheduling, set the preemption
3087 * timeout to the minimum value (1 us) so the banned context
3088 * gets kicked off the HW ASAP.
3090 with_intel_runtime_pm(runtime_pm, wakeref) {
3091 __guc_context_set_preemption_timeout(guc, guc_id,
3092 preempt_timeout_ms);
3093 __guc_context_sched_disable(guc, ce, guc_id);
3096 if (!context_guc_id_invalid(ce))
3097 with_intel_runtime_pm(runtime_pm, wakeref)
3098 __guc_context_set_preemption_timeout(guc,
3100 preempt_timeout_ms);
3101 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3105 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
3106 unsigned long flags)
3107 __releases(ce->guc_state.lock)
3109 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3110 intel_wakeref_t wakeref;
3113 lockdep_assert_held(&ce->guc_state.lock);
3114 guc_id = prep_context_pending_disable(ce);
3116 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3118 with_intel_runtime_pm(runtime_pm, wakeref)
3119 __guc_context_sched_disable(guc, ce, guc_id);
3122 static bool bypass_sched_disable(struct intel_guc *guc,
3123 struct intel_context *ce)
3125 lockdep_assert_held(&ce->guc_state.lock);
3126 GEM_BUG_ON(intel_context_is_child(ce));
3128 if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
3129 !ctx_id_mapped(guc, ce->guc_id.id)) {
3130 clr_context_enabled(ce);
3134 return !context_enabled(ce);
3137 static void __delay_sched_disable(struct work_struct *wrk)
3139 struct intel_context *ce =
3140 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
3141 struct intel_guc *guc = ce_to_guc(ce);
3142 unsigned long flags;
3144 spin_lock_irqsave(&ce->guc_state.lock, flags);
3146 if (bypass_sched_disable(guc, ce)) {
3147 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3148 intel_context_sched_disable_unpin(ce);
3150 do_sched_disable(guc, ce, flags);
3154 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
3157 * parent contexts are perma-pinned, if we are unpinning do schedule
3158 * disable immediately.
3160 if (intel_context_is_parent(ce))
3164 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
3166 return guc->submission_state.guc_ids_in_use >
3167 guc->submission_state.sched_disable_gucid_threshold;
3170 static void guc_context_sched_disable(struct intel_context *ce)
3172 struct intel_guc *guc = ce_to_guc(ce);
3173 u64 delay = guc->submission_state.sched_disable_delay_ms;
3174 unsigned long flags;
3176 spin_lock_irqsave(&ce->guc_state.lock, flags);
3178 if (bypass_sched_disable(guc, ce)) {
3179 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3180 intel_context_sched_disable_unpin(ce);
3181 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
3183 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3184 mod_delayed_work(system_unbound_wq,
3185 &ce->guc_state.sched_disable_delay_work,
3186 msecs_to_jiffies(delay));
3188 do_sched_disable(guc, ce, flags);
3192 static void guc_context_close(struct intel_context *ce)
3194 unsigned long flags;
3196 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
3197 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
3198 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
3200 spin_lock_irqsave(&ce->guc_state.lock, flags);
3201 set_context_close_done(ce);
3202 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3205 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3207 struct intel_guc *guc = ce_to_guc(ce);
3208 struct intel_gt *gt = guc_to_gt(guc);
3209 unsigned long flags;
3212 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3213 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3214 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3215 GEM_BUG_ON(context_enabled(ce));
3217 /* Seal race with Reset */
3218 spin_lock_irqsave(&ce->guc_state.lock, flags);
3219 disabled = submission_disabled(guc);
3220 if (likely(!disabled)) {
3221 __intel_gt_pm_get(gt);
3222 set_context_destroyed(ce);
3223 clr_context_registered(ce);
3225 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3226 if (unlikely(disabled)) {
3227 release_guc_id(guc, ce);
3228 __guc_context_destroy(ce);
3232 deregister_context(ce, ce->guc_id.id);
3235 static void __guc_context_destroy(struct intel_context *ce)
3237 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3238 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3239 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3240 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3243 intel_context_fini(ce);
3245 if (intel_engine_is_virtual(ce->engine)) {
3246 struct guc_virtual_engine *ve =
3247 container_of(ce, typeof(*ve), context);
3249 if (ve->base.breadcrumbs)
3250 intel_breadcrumbs_put(ve->base.breadcrumbs);
3254 intel_context_free(ce);
3258 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3260 struct intel_context *ce;
3261 unsigned long flags;
3263 GEM_BUG_ON(!submission_disabled(guc) &&
3264 guc_submission_initialized(guc));
3266 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3267 spin_lock_irqsave(&guc->submission_state.lock, flags);
3268 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3269 struct intel_context,
3272 list_del_init(&ce->destroyed_link);
3273 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3278 release_guc_id(guc, ce);
3279 __guc_context_destroy(ce);
3283 static void deregister_destroyed_contexts(struct intel_guc *guc)
3285 struct intel_context *ce;
3286 unsigned long flags;
3288 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3289 spin_lock_irqsave(&guc->submission_state.lock, flags);
3290 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3291 struct intel_context,
3294 list_del_init(&ce->destroyed_link);
3295 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3300 guc_lrc_desc_unpin(ce);
3304 static void destroyed_worker_func(struct work_struct *w)
3306 struct intel_guc *guc = container_of(w, struct intel_guc,
3307 submission_state.destroyed_worker);
3308 struct intel_gt *gt = guc_to_gt(guc);
3311 with_intel_gt_pm(gt, tmp)
3312 deregister_destroyed_contexts(guc);
3315 static void guc_context_destroy(struct kref *kref)
3317 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3318 struct intel_guc *guc = ce_to_guc(ce);
3319 unsigned long flags;
3323 * If the guc_id is invalid this context has been stolen and we can free
3324 * it immediately. Also can be freed immediately if the context is not
3325 * registered with the GuC or the GuC is in the middle of a reset.
3327 spin_lock_irqsave(&guc->submission_state.lock, flags);
3328 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3329 !ctx_id_mapped(guc, ce->guc_id.id);
3330 if (likely(!destroy)) {
3331 if (!list_empty(&ce->guc_id.link))
3332 list_del_init(&ce->guc_id.link);
3333 list_add_tail(&ce->destroyed_link,
3334 &guc->submission_state.destroyed_contexts);
3336 __release_guc_id(guc, ce);
3338 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3339 if (unlikely(destroy)) {
3340 __guc_context_destroy(ce);
3345 * We use a worker to issue the H2G to deregister the context as we can
3346 * take the GT PM for the first time which isn't allowed from an atomic
3349 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3352 static int guc_context_alloc(struct intel_context *ce)
3354 return lrc_alloc(ce, ce->engine);
3357 static void __guc_context_set_prio(struct intel_guc *guc,
3358 struct intel_context *ce)
3360 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3361 struct context_policy policy;
3363 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3364 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3365 __guc_context_set_context_policies(guc, &policy, true);
3368 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3373 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3377 static void guc_context_set_prio(struct intel_guc *guc,
3378 struct intel_context *ce,
3381 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3382 prio > GUC_CLIENT_PRIORITY_NORMAL);
3383 lockdep_assert_held(&ce->guc_state.lock);
3385 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3386 !context_registered(ce)) {
3387 ce->guc_state.prio = prio;
3391 ce->guc_state.prio = prio;
3392 __guc_context_set_prio(guc, ce);
3394 trace_intel_context_set_prio(ce);
3397 static inline u8 map_i915_prio_to_guc_prio(int prio)
3399 if (prio == I915_PRIORITY_NORMAL)
3400 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3401 else if (prio < I915_PRIORITY_NORMAL)
3402 return GUC_CLIENT_PRIORITY_NORMAL;
3403 else if (prio < I915_PRIORITY_DISPLAY)
3404 return GUC_CLIENT_PRIORITY_HIGH;
3406 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3409 static inline void add_context_inflight_prio(struct intel_context *ce,
3412 lockdep_assert_held(&ce->guc_state.lock);
3413 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3415 ++ce->guc_state.prio_count[guc_prio];
3417 /* Overflow protection */
3418 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3421 static inline void sub_context_inflight_prio(struct intel_context *ce,
3424 lockdep_assert_held(&ce->guc_state.lock);
3425 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3427 /* Underflow protection */
3428 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3430 --ce->guc_state.prio_count[guc_prio];
3433 static inline void update_context_prio(struct intel_context *ce)
3435 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3438 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3439 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3441 lockdep_assert_held(&ce->guc_state.lock);
3443 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3444 if (ce->guc_state.prio_count[i]) {
3445 guc_context_set_prio(guc, ce, i);
3451 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3453 /* Lower value is higher priority */
3454 return new_guc_prio < old_guc_prio;
3457 static void add_to_context(struct i915_request *rq)
3459 struct intel_context *ce = request_to_scheduling_context(rq);
3460 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3462 GEM_BUG_ON(intel_context_is_child(ce));
3463 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3465 spin_lock(&ce->guc_state.lock);
3466 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3468 if (rq->guc_prio == GUC_PRIO_INIT) {
3469 rq->guc_prio = new_guc_prio;
3470 add_context_inflight_prio(ce, rq->guc_prio);
3471 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3472 sub_context_inflight_prio(ce, rq->guc_prio);
3473 rq->guc_prio = new_guc_prio;
3474 add_context_inflight_prio(ce, rq->guc_prio);
3476 update_context_prio(ce);
3478 spin_unlock(&ce->guc_state.lock);
3481 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3483 lockdep_assert_held(&ce->guc_state.lock);
3485 if (rq->guc_prio != GUC_PRIO_INIT &&
3486 rq->guc_prio != GUC_PRIO_FINI) {
3487 sub_context_inflight_prio(ce, rq->guc_prio);
3488 update_context_prio(ce);
3490 rq->guc_prio = GUC_PRIO_FINI;
3493 static void remove_from_context(struct i915_request *rq)
3495 struct intel_context *ce = request_to_scheduling_context(rq);
3497 GEM_BUG_ON(intel_context_is_child(ce));
3499 spin_lock_irq(&ce->guc_state.lock);
3501 list_del_init(&rq->sched.link);
3502 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3504 /* Prevent further __await_execution() registering a cb, then flush */
3505 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3507 guc_prio_fini(rq, ce);
3509 spin_unlock_irq(&ce->guc_state.lock);
3511 atomic_dec(&ce->guc_id.ref);
3512 i915_request_notify_execute_cb_imm(rq);
3515 static const struct intel_context_ops guc_context_ops = {
3516 .flags = COPS_RUNTIME_CYCLES,
3517 .alloc = guc_context_alloc,
3519 .close = guc_context_close,
3521 .pre_pin = guc_context_pre_pin,
3522 .pin = guc_context_pin,
3523 .unpin = guc_context_unpin,
3524 .post_unpin = guc_context_post_unpin,
3526 .revoke = guc_context_revoke,
3528 .cancel_request = guc_context_cancel_request,
3530 .enter = intel_context_enter_engine,
3531 .exit = intel_context_exit_engine,
3533 .sched_disable = guc_context_sched_disable,
3535 .update_stats = guc_context_update_stats,
3538 .destroy = guc_context_destroy,
3540 .create_virtual = guc_create_virtual,
3541 .create_parallel = guc_create_parallel,
3544 static void submit_work_cb(struct irq_work *wrk)
3546 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3548 might_lock(&rq->engine->sched_engine->lock);
3549 i915_sw_fence_complete(&rq->submit);
3552 static void __guc_signal_context_fence(struct intel_context *ce)
3554 struct i915_request *rq, *rn;
3556 lockdep_assert_held(&ce->guc_state.lock);
3558 if (!list_empty(&ce->guc_state.fences))
3559 trace_intel_context_fence_release(ce);
3562 * Use an IRQ to ensure locking order of sched_engine->lock ->
3563 * ce->guc_state.lock is preserved.
3565 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3567 list_del(&rq->guc_fence_link);
3568 irq_work_queue(&rq->submit_work);
3571 INIT_LIST_HEAD(&ce->guc_state.fences);
3574 static void guc_signal_context_fence(struct intel_context *ce)
3576 unsigned long flags;
3578 GEM_BUG_ON(intel_context_is_child(ce));
3580 spin_lock_irqsave(&ce->guc_state.lock, flags);
3581 clr_context_wait_for_deregister_to_register(ce);
3582 __guc_signal_context_fence(ce);
3583 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3586 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3588 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3589 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3590 !submission_disabled(ce_to_guc(ce));
3593 static void guc_context_init(struct intel_context *ce)
3595 const struct i915_gem_context *ctx;
3596 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3599 ctx = rcu_dereference(ce->gem_context);
3601 prio = ctx->sched.priority;
3604 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3606 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
3607 __delay_sched_disable);
3609 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3612 static int guc_request_alloc(struct i915_request *rq)
3614 struct intel_context *ce = request_to_scheduling_context(rq);
3615 struct intel_guc *guc = ce_to_guc(ce);
3616 unsigned long flags;
3619 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3622 * Flush enough space to reduce the likelihood of waiting after
3623 * we start building the request - in which case we will just
3624 * have to repeat work.
3626 rq->reserved_space += GUC_REQUEST_SIZE;
3629 * Note that after this point, we have committed to using
3630 * this request as it is being used to both track the
3631 * state of engine initialisation and liveness of the
3632 * golden renderstate above. Think twice before you try
3633 * to cancel/unwind this request now.
3636 /* Unconditionally invalidate GPU caches and TLBs. */
3637 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3641 rq->reserved_space -= GUC_REQUEST_SIZE;
3643 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3644 guc_context_init(ce);
3647 * If the context gets closed while the execbuf is ongoing, the context
3648 * close code will race with the below code to cancel the delayed work.
3649 * If the context close wins the race and cancels the work, it will
3650 * immediately call the sched disable (see guc_context_close), so there
3651 * is a chance we can get past this check while the sched_disable code
3652 * is being executed. To make sure that code completes before we check
3653 * the status further down, we wait for the close process to complete.
3654 * Else, this code path could send a request down thinking that the
3655 * context is still in a schedule-enable mode while the GuC ends up
3656 * dropping the request completely because the disable did go from the
3657 * context_close path right to GuC just prior. In the event the CT is
3658 * full, we could potentially need to wait up to 1.5 seconds.
3660 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
3661 intel_context_sched_disable_unpin(ce);
3662 else if (intel_context_is_closed(ce))
3663 if (wait_for(context_close_done(ce), 1500))
3664 guc_warn(guc, "timed out waiting on context sched close before realloc\n");
3666 * Call pin_guc_id here rather than in the pinning step as with
3667 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3668 * guc_id and creating horrible race conditions. This is especially bad
3669 * when guc_id are being stolen due to over subscription. By the time
3670 * this function is reached, it is guaranteed that the guc_id will be
3671 * persistent until the generated request is retired. Thus, sealing these
3672 * race conditions. It is still safe to fail here if guc_id are
3673 * exhausted and return -EAGAIN to the user indicating that they can try
3674 * again in the future.
3676 * There is no need for a lock here as the timeline mutex ensures at
3677 * most one context can be executing this code path at once. The
3678 * guc_id_ref is incremented once for every request in flight and
3679 * decremented on each retire. When it is zero, a lock around the
3680 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3682 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3685 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3686 if (unlikely(ret < 0))
3688 if (context_needs_register(ce, !!ret)) {
3689 ret = try_context_registration(ce, true);
3690 if (unlikely(ret)) { /* unwind */
3691 if (ret == -EPIPE) {
3692 disable_submission(guc);
3693 goto out; /* GPU will be reset */
3695 atomic_dec(&ce->guc_id.ref);
3696 unpin_guc_id(guc, ce);
3701 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3705 * We block all requests on this context if a G2H is pending for a
3706 * schedule disable or context deregistration as the GuC will fail a
3707 * schedule enable or context registration if either G2H is pending
3708 * respectfully. Once a G2H returns, the fence is released that is
3709 * blocking these requests (see guc_signal_context_fence).
3711 spin_lock_irqsave(&ce->guc_state.lock, flags);
3712 if (context_wait_for_deregister_to_register(ce) ||
3713 context_pending_disable(ce)) {
3714 init_irq_work(&rq->submit_work, submit_work_cb);
3715 i915_sw_fence_await(&rq->submit);
3717 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3719 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3724 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3725 struct i915_gem_ww_ctx *ww,
3728 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3730 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3733 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3735 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3736 int ret = __guc_context_pin(ce, engine, vaddr);
3737 intel_engine_mask_t tmp, mask = ce->engine->mask;
3740 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3741 intel_engine_pm_get(engine);
3746 static void guc_virtual_context_unpin(struct intel_context *ce)
3748 intel_engine_mask_t tmp, mask = ce->engine->mask;
3749 struct intel_engine_cs *engine;
3750 struct intel_guc *guc = ce_to_guc(ce);
3752 GEM_BUG_ON(context_enabled(ce));
3753 GEM_BUG_ON(intel_context_is_barrier(ce));
3755 unpin_guc_id(guc, ce);
3758 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3759 intel_engine_pm_put_async(engine);
3762 static void guc_virtual_context_enter(struct intel_context *ce)
3764 intel_engine_mask_t tmp, mask = ce->engine->mask;
3765 struct intel_engine_cs *engine;
3767 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3768 intel_engine_pm_get(engine);
3770 intel_timeline_enter(ce->timeline);
3773 static void guc_virtual_context_exit(struct intel_context *ce)
3775 intel_engine_mask_t tmp, mask = ce->engine->mask;
3776 struct intel_engine_cs *engine;
3778 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3779 intel_engine_pm_put(engine);
3781 intel_timeline_exit(ce->timeline);
3784 static int guc_virtual_context_alloc(struct intel_context *ce)
3786 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3788 return lrc_alloc(ce, engine);
3791 static const struct intel_context_ops virtual_guc_context_ops = {
3792 .flags = COPS_RUNTIME_CYCLES,
3793 .alloc = guc_virtual_context_alloc,
3795 .close = guc_context_close,
3797 .pre_pin = guc_virtual_context_pre_pin,
3798 .pin = guc_virtual_context_pin,
3799 .unpin = guc_virtual_context_unpin,
3800 .post_unpin = guc_context_post_unpin,
3802 .revoke = guc_context_revoke,
3804 .cancel_request = guc_context_cancel_request,
3806 .enter = guc_virtual_context_enter,
3807 .exit = guc_virtual_context_exit,
3809 .sched_disable = guc_context_sched_disable,
3810 .update_stats = guc_context_update_stats,
3812 .destroy = guc_context_destroy,
3814 .get_sibling = guc_virtual_get_sibling,
3817 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3819 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3820 struct intel_guc *guc = ce_to_guc(ce);
3823 GEM_BUG_ON(!intel_context_is_parent(ce));
3824 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3826 ret = pin_guc_id(guc, ce);
3827 if (unlikely(ret < 0))
3830 return __guc_context_pin(ce, engine, vaddr);
3833 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3835 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3837 GEM_BUG_ON(!intel_context_is_child(ce));
3838 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3840 __intel_context_pin(ce->parallel.parent);
3841 return __guc_context_pin(ce, engine, vaddr);
3844 static void guc_parent_context_unpin(struct intel_context *ce)
3846 struct intel_guc *guc = ce_to_guc(ce);
3848 GEM_BUG_ON(context_enabled(ce));
3849 GEM_BUG_ON(intel_context_is_barrier(ce));
3850 GEM_BUG_ON(!intel_context_is_parent(ce));
3851 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3853 unpin_guc_id(guc, ce);
3857 static void guc_child_context_unpin(struct intel_context *ce)
3859 GEM_BUG_ON(context_enabled(ce));
3860 GEM_BUG_ON(intel_context_is_barrier(ce));
3861 GEM_BUG_ON(!intel_context_is_child(ce));
3862 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3867 static void guc_child_context_post_unpin(struct intel_context *ce)
3869 GEM_BUG_ON(!intel_context_is_child(ce));
3870 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3871 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3874 intel_context_unpin(ce->parallel.parent);
3877 static void guc_child_context_destroy(struct kref *kref)
3879 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3881 __guc_context_destroy(ce);
3884 static const struct intel_context_ops virtual_parent_context_ops = {
3885 .alloc = guc_virtual_context_alloc,
3887 .close = guc_context_close,
3889 .pre_pin = guc_context_pre_pin,
3890 .pin = guc_parent_context_pin,
3891 .unpin = guc_parent_context_unpin,
3892 .post_unpin = guc_context_post_unpin,
3894 .revoke = guc_context_revoke,
3896 .cancel_request = guc_context_cancel_request,
3898 .enter = guc_virtual_context_enter,
3899 .exit = guc_virtual_context_exit,
3901 .sched_disable = guc_context_sched_disable,
3903 .destroy = guc_context_destroy,
3905 .get_sibling = guc_virtual_get_sibling,
3908 static const struct intel_context_ops virtual_child_context_ops = {
3909 .alloc = guc_virtual_context_alloc,
3911 .pre_pin = guc_context_pre_pin,
3912 .pin = guc_child_context_pin,
3913 .unpin = guc_child_context_unpin,
3914 .post_unpin = guc_child_context_post_unpin,
3916 .cancel_request = guc_context_cancel_request,
3918 .enter = guc_virtual_context_enter,
3919 .exit = guc_virtual_context_exit,
3921 .destroy = guc_child_context_destroy,
3923 .get_sibling = guc_virtual_get_sibling,
3927 * The below override of the breadcrumbs is enabled when the user configures a
3928 * context for parallel submission (multi-lrc, parent-child).
3930 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3931 * safely preempt all the hw contexts configured for parallel submission
3932 * between each BB. The contract between the i915 and GuC is if the parent
3933 * context can be preempted, all the children can be preempted, and the GuC will
3934 * always try to preempt the parent before the children. A handshake between the
3935 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3936 * creating a window to preempt between each set of BBs.
3938 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3939 u64 offset, u32 len,
3940 const unsigned int flags);
3941 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3942 u64 offset, u32 len,
3943 const unsigned int flags);
3945 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3948 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3951 static struct intel_context *
3952 guc_create_parallel(struct intel_engine_cs **engines,
3953 unsigned int num_siblings,
3956 struct intel_engine_cs **siblings = NULL;
3957 struct intel_context *parent = NULL, *ce, *err;
3960 siblings = kmalloc_array(num_siblings,
3964 return ERR_PTR(-ENOMEM);
3966 for (i = 0; i < width; ++i) {
3967 for (j = 0; j < num_siblings; ++j)
3968 siblings[j] = engines[i * num_siblings + j];
3970 ce = intel_engine_create_virtual(siblings, num_siblings,
3979 parent->ops = &virtual_parent_context_ops;
3981 ce->ops = &virtual_child_context_ops;
3982 intel_context_bind_parent_child(parent, ce);
3986 parent->parallel.fence_context = dma_fence_context_alloc(1);
3988 parent->engine->emit_bb_start =
3989 emit_bb_start_parent_no_preempt_mid_batch;
3990 parent->engine->emit_fini_breadcrumb =
3991 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3992 parent->engine->emit_fini_breadcrumb_dw =
3993 12 + 4 * parent->parallel.number_children;
3994 for_each_child(parent, ce) {
3995 ce->engine->emit_bb_start =
3996 emit_bb_start_child_no_preempt_mid_batch;
3997 ce->engine->emit_fini_breadcrumb =
3998 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3999 ce->engine->emit_fini_breadcrumb_dw = 16;
4007 intel_context_put(parent);
4013 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
4015 struct intel_engine_cs *sibling;
4016 intel_engine_mask_t tmp, mask = b->engine_mask;
4017 bool result = false;
4019 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4020 result |= intel_engine_irq_enable(sibling);
4026 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
4028 struct intel_engine_cs *sibling;
4029 intel_engine_mask_t tmp, mask = b->engine_mask;
4031 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4032 intel_engine_irq_disable(sibling);
4035 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
4040 * In GuC submission mode we do not know which physical engine a request
4041 * will be scheduled on, this creates a problem because the breadcrumb
4042 * interrupt is per physical engine. To work around this we attach
4043 * requests and direct all breadcrumb interrupts to the first instance
4044 * of an engine per class. In addition all breadcrumb interrupts are
4045 * enabled / disabled across an engine class in unison.
4047 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
4048 struct intel_engine_cs *sibling =
4049 engine->gt->engine_class[engine->class][i];
4052 if (engine->breadcrumbs != sibling->breadcrumbs) {
4053 intel_breadcrumbs_put(engine->breadcrumbs);
4054 engine->breadcrumbs =
4055 intel_breadcrumbs_get(sibling->breadcrumbs);
4061 if (engine->breadcrumbs) {
4062 engine->breadcrumbs->engine_mask |= engine->mask;
4063 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
4064 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
4068 static void guc_bump_inflight_request_prio(struct i915_request *rq,
4071 struct intel_context *ce = request_to_scheduling_context(rq);
4072 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
4074 /* Short circuit function */
4075 if (prio < I915_PRIORITY_NORMAL ||
4076 rq->guc_prio == GUC_PRIO_FINI ||
4077 (rq->guc_prio != GUC_PRIO_INIT &&
4078 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
4081 spin_lock(&ce->guc_state.lock);
4082 if (rq->guc_prio != GUC_PRIO_FINI) {
4083 if (rq->guc_prio != GUC_PRIO_INIT)
4084 sub_context_inflight_prio(ce, rq->guc_prio);
4085 rq->guc_prio = new_guc_prio;
4086 add_context_inflight_prio(ce, rq->guc_prio);
4087 update_context_prio(ce);
4089 spin_unlock(&ce->guc_state.lock);
4092 static void guc_retire_inflight_request_prio(struct i915_request *rq)
4094 struct intel_context *ce = request_to_scheduling_context(rq);
4096 spin_lock(&ce->guc_state.lock);
4097 guc_prio_fini(rq, ce);
4098 spin_unlock(&ce->guc_state.lock);
4101 static void sanitize_hwsp(struct intel_engine_cs *engine)
4103 struct intel_timeline *tl;
4105 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
4106 intel_timeline_reset_seqno(tl);
4109 static void guc_sanitize(struct intel_engine_cs *engine)
4112 * Poison residual state on resume, in case the suspend didn't!
4114 * We have to assume that across suspend/resume (or other loss
4115 * of control) that the contents of our pinned buffers has been
4116 * lost, replaced by garbage. Since this doesn't always happen,
4117 * let's poison such state so that we more quickly spot when
4118 * we falsely assume it has been preserved.
4120 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4121 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4124 * The kernel_context HWSP is stored in the status_page. As above,
4125 * that may be lost on resume/initialisation, and so we need to
4126 * reset the value in the HWSP.
4128 sanitize_hwsp(engine);
4130 /* And scrub the dirty cachelines for the HWSP */
4131 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4133 intel_engine_reset_pinned_contexts(engine);
4136 static void setup_hwsp(struct intel_engine_cs *engine)
4138 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4140 ENGINE_WRITE_FW(engine,
4142 i915_ggtt_offset(engine->status_page.vma));
4145 static void start_engine(struct intel_engine_cs *engine)
4147 ENGINE_WRITE_FW(engine,
4149 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4151 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4152 ENGINE_POSTING_READ(engine, RING_MI_MODE);
4155 static int guc_resume(struct intel_engine_cs *engine)
4157 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4159 intel_mocs_init_engine(engine);
4161 intel_breadcrumbs_reset(engine->breadcrumbs);
4164 start_engine(engine);
4166 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4167 xehp_enable_ccs_engines(engine);
4172 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4174 return !sched_engine->tasklet.callback;
4177 static void guc_set_default_submission(struct intel_engine_cs *engine)
4179 engine->submit_request = guc_submit_request;
4182 static inline int guc_kernel_context_pin(struct intel_guc *guc,
4183 struct intel_context *ce)
4188 * Note: we purposefully do not check the returns below because
4189 * the registration can only fail if a reset is just starting.
4190 * This is called at the end of reset so presumably another reset
4191 * isn't happening and even it did this code would be run again.
4194 if (context_guc_id_invalid(ce)) {
4195 ret = pin_guc_id(guc, ce);
4201 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
4202 guc_context_init(ce);
4204 ret = try_context_registration(ce, true);
4206 unpin_guc_id(guc, ce);
4211 static inline int guc_init_submission(struct intel_guc *guc)
4213 struct intel_gt *gt = guc_to_gt(guc);
4214 struct intel_engine_cs *engine;
4215 enum intel_engine_id id;
4217 /* make sure all descriptors are clean... */
4218 xa_destroy(&guc->context_lookup);
4221 * A reset might have occurred while we had a pending stalled request,
4222 * so make sure we clean that up.
4224 guc->stalled_request = NULL;
4225 guc->submission_stall_reason = STALL_NONE;
4228 * Some contexts might have been pinned before we enabled GuC
4229 * submission, so we need to add them to the GuC bookeeping.
4230 * Also, after a reset the of the GuC we want to make sure that the
4231 * information shared with GuC is properly reset. The kernel LRCs are
4232 * not attached to the gem_context, so they need to be added separately.
4234 for_each_engine(engine, gt, id) {
4235 struct intel_context *ce;
4237 list_for_each_entry(ce, &engine->pinned_contexts_list,
4238 pinned_contexts_link) {
4239 int ret = guc_kernel_context_pin(guc, ce);
4242 /* No point in trying to clean up as i915 will wedge on failure */
4251 static void guc_release(struct intel_engine_cs *engine)
4253 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4255 intel_engine_cleanup_common(engine);
4256 lrc_fini_wa_ctx(engine);
4259 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4261 struct intel_engine_cs *e;
4262 intel_engine_mask_t tmp, mask = engine->mask;
4264 for_each_engine_masked(e, engine->gt, mask, tmp)
4268 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4270 /* Default vfuncs which can be overridden by each engine. */
4272 engine->resume = guc_resume;
4274 engine->cops = &guc_context_ops;
4275 engine->request_alloc = guc_request_alloc;
4276 engine->add_active_request = add_to_context;
4277 engine->remove_active_request = remove_from_context;
4279 engine->sched_engine->schedule = i915_schedule;
4281 engine->reset.prepare = guc_engine_reset_prepare;
4282 engine->reset.rewind = guc_rewind_nop;
4283 engine->reset.cancel = guc_reset_nop;
4284 engine->reset.finish = guc_reset_nop;
4286 engine->emit_flush = gen8_emit_flush_xcs;
4287 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4288 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4289 if (GRAPHICS_VER(engine->i915) >= 12) {
4290 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4291 engine->emit_flush = gen12_emit_flush_xcs;
4293 engine->set_default_submission = guc_set_default_submission;
4294 engine->busyness = guc_engine_busyness;
4296 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4297 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4298 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4300 /* Wa_14014475959:dg2 */
4301 if (engine->class == COMPUTE_CLASS)
4302 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
4303 IS_DG2(engine->i915))
4304 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4307 * TODO: GuC supports timeslicing and semaphores as well, but they're
4308 * handled by the firmware so some minor tweaks are required before
4311 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4314 engine->emit_bb_start = gen8_emit_bb_start;
4315 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4316 engine->emit_bb_start = xehp_emit_bb_start;
4319 static void rcs_submission_override(struct intel_engine_cs *engine)
4321 switch (GRAPHICS_VER(engine->i915)) {
4323 engine->emit_flush = gen12_emit_flush_rcs;
4324 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4327 engine->emit_flush = gen11_emit_flush_rcs;
4328 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4331 engine->emit_flush = gen8_emit_flush_rcs;
4332 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4337 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4339 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4340 intel_engine_set_irq_handler(engine, cs_irq_handler);
4343 static void guc_sched_engine_destroy(struct kref *kref)
4345 struct i915_sched_engine *sched_engine =
4346 container_of(kref, typeof(*sched_engine), ref);
4347 struct intel_guc *guc = sched_engine->private_data;
4349 guc->sched_engine = NULL;
4350 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4351 kfree(sched_engine);
4354 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4356 struct drm_i915_private *i915 = engine->i915;
4357 struct intel_guc *guc = &engine->gt->uc.guc;
4360 * The setup relies on several assumptions (e.g. irqs always enabled)
4361 * that are only valid on gen11+
4363 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4365 if (!guc->sched_engine) {
4366 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4367 if (!guc->sched_engine)
4370 guc->sched_engine->schedule = i915_schedule;
4371 guc->sched_engine->disabled = guc_sched_engine_disabled;
4372 guc->sched_engine->private_data = guc;
4373 guc->sched_engine->destroy = guc_sched_engine_destroy;
4374 guc->sched_engine->bump_inflight_request_prio =
4375 guc_bump_inflight_request_prio;
4376 guc->sched_engine->retire_inflight_request_prio =
4377 guc_retire_inflight_request_prio;
4378 tasklet_setup(&guc->sched_engine->tasklet,
4379 guc_submission_tasklet);
4381 i915_sched_engine_put(engine->sched_engine);
4382 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4384 guc_default_vfuncs(engine);
4385 guc_default_irqs(engine);
4386 guc_init_breadcrumbs(engine);
4388 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4389 rcs_submission_override(engine);
4391 lrc_init_wa_ctx(engine);
4393 /* Finally, take ownership and responsibility for cleanup! */
4394 engine->sanitize = guc_sanitize;
4395 engine->release = guc_release;
4400 struct scheduling_policy {
4402 u32 max_words, num_words;
4405 struct guc_update_scheduling_policy h2g;
4408 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
4410 u32 *start = (void *)&policy->h2g;
4411 u32 *end = policy->h2g.data + policy->num_words;
4412 size_t delta = end - start;
4417 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
4419 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
4420 policy->max_words = ARRAY_SIZE(policy->h2g.data);
4421 policy->num_words = 0;
4427 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
4428 u32 action, u32 *data, u32 len)
4430 u32 *klv_ptr = policy->h2g.data + policy->num_words;
4432 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
4433 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
4434 FIELD_PREP(GUC_KLV_0_LEN, len);
4435 memcpy(klv_ptr, data, sizeof(u32) * len);
4436 policy->num_words += 1 + len;
4440 static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
4441 struct scheduling_policy *policy)
4445 ret = intel_guc_send(guc, (u32 *)&policy->h2g,
4446 __guc_scheduling_policy_action_size(policy));
4448 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
4453 if (ret != policy->count) {
4454 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
4455 ret, policy->count);
4456 if (ret > policy->count)
4463 static int guc_init_global_schedule_policy(struct intel_guc *guc)
4465 struct scheduling_policy policy;
4466 struct intel_gt *gt = guc_to_gt(guc);
4467 intel_wakeref_t wakeref;
4470 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
4473 __guc_scheduling_policy_start_klv(&policy);
4475 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
4477 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
4478 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
4481 __guc_scheduling_policy_add_klv(&policy,
4482 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
4483 yield, ARRAY_SIZE(yield));
4485 ret = __guc_action_set_scheduling_policies(guc, &policy);
4491 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
4493 struct intel_gt *gt = guc_to_gt(guc);
4496 if (GRAPHICS_VER(gt->i915) < 12)
4500 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL;
4504 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val);
4507 int intel_guc_submission_enable(struct intel_guc *guc)
4511 /* Semaphore interrupt enable and route to GuC */
4512 guc_route_semaphores(guc, true);
4514 ret = guc_init_submission(guc);
4518 ret = guc_init_engine_stats(guc);
4522 ret = guc_init_global_schedule_policy(guc);
4529 guc_fini_engine_stats(guc);
4531 guc_route_semaphores(guc, false);
4535 /* Note: By the time we're here, GuC may have already been reset */
4536 void intel_guc_submission_disable(struct intel_guc *guc)
4538 guc_cancel_busyness_worker(guc);
4540 /* Semaphore interrupt disable and route to host */
4541 guc_route_semaphores(guc, false);
4544 static bool __guc_submission_supported(struct intel_guc *guc)
4546 /* GuC submission is unavailable for pre-Gen11 */
4547 return intel_guc_is_supported(guc) &&
4548 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4551 static bool __guc_submission_selected(struct intel_guc *guc)
4553 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4555 if (!intel_guc_submission_is_supported(guc))
4558 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4561 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
4563 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
4567 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
4568 * workloads are able to enjoy the latency reduction when delaying the schedule-disable
4569 * operation. This matches the 30fps game-render + encode (real world) workload this
4570 * knob was tested against.
4572 #define SCHED_DISABLE_DELAY_MS 34
4575 * A threshold of 75% is a reasonable starting point considering that real world apps
4576 * generally don't get anywhere near this.
4578 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
4579 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
4581 void intel_guc_submission_init_early(struct intel_guc *guc)
4583 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4585 spin_lock_init(&guc->submission_state.lock);
4586 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4587 ida_init(&guc->submission_state.guc_ids);
4588 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4589 INIT_WORK(&guc->submission_state.destroyed_worker,
4590 destroyed_worker_func);
4591 INIT_WORK(&guc->submission_state.reset_fail_worker,
4592 reset_fail_worker_func);
4594 spin_lock_init(&guc->timestamp.lock);
4595 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4597 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
4598 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4599 guc->submission_state.sched_disable_gucid_threshold =
4600 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
4601 guc->submission_supported = __guc_submission_supported(guc);
4602 guc->submission_selected = __guc_submission_selected(guc);
4605 static inline struct intel_context *
4606 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4608 struct intel_context *ce;
4610 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4611 guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
4615 ce = __get_context(guc, ctx_id);
4616 if (unlikely(!ce)) {
4617 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
4621 if (unlikely(intel_context_is_child(ce))) {
4622 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
4629 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4633 struct intel_context *ce;
4636 if (unlikely(len < 1)) {
4637 guc_err(guc, "Invalid length %u\n", len);
4642 ce = g2h_context_lookup(guc, ctx_id);
4646 trace_intel_context_deregister_done(ce);
4648 #ifdef CONFIG_DRM_I915_SELFTEST
4649 if (unlikely(ce->drop_deregister)) {
4650 ce->drop_deregister = false;
4655 if (context_wait_for_deregister_to_register(ce)) {
4656 struct intel_runtime_pm *runtime_pm =
4657 &ce->engine->gt->i915->runtime_pm;
4658 intel_wakeref_t wakeref;
4661 * Previous owner of this guc_id has been deregistered, now safe
4662 * register this context.
4664 with_intel_runtime_pm(runtime_pm, wakeref)
4665 register_context(ce, true);
4666 guc_signal_context_fence(ce);
4667 intel_context_put(ce);
4668 } else if (context_destroyed(ce)) {
4669 /* Context has been destroyed */
4670 intel_gt_pm_put_async(guc_to_gt(guc));
4671 release_guc_id(guc, ce);
4672 __guc_context_destroy(ce);
4675 decr_outstanding_submission_g2h(guc);
4680 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4684 struct intel_context *ce;
4685 unsigned long flags;
4688 if (unlikely(len < 2)) {
4689 guc_err(guc, "Invalid length %u\n", len);
4694 ce = g2h_context_lookup(guc, ctx_id);
4698 if (unlikely(context_destroyed(ce) ||
4699 (!context_pending_enable(ce) &&
4700 !context_pending_disable(ce)))) {
4701 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
4702 ce->guc_state.sched_state, ctx_id);
4706 trace_intel_context_sched_done(ce);
4708 if (context_pending_enable(ce)) {
4709 #ifdef CONFIG_DRM_I915_SELFTEST
4710 if (unlikely(ce->drop_schedule_enable)) {
4711 ce->drop_schedule_enable = false;
4716 spin_lock_irqsave(&ce->guc_state.lock, flags);
4717 clr_context_pending_enable(ce);
4718 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4719 } else if (context_pending_disable(ce)) {
4722 #ifdef CONFIG_DRM_I915_SELFTEST
4723 if (unlikely(ce->drop_schedule_disable)) {
4724 ce->drop_schedule_disable = false;
4730 * Unpin must be done before __guc_signal_context_fence,
4731 * otherwise a race exists between the requests getting
4732 * submitted + retired before this unpin completes resulting in
4733 * the pin_count going to zero and the context still being
4736 intel_context_sched_disable_unpin(ce);
4738 spin_lock_irqsave(&ce->guc_state.lock, flags);
4739 banned = context_banned(ce);
4740 clr_context_banned(ce);
4741 clr_context_pending_disable(ce);
4742 __guc_signal_context_fence(ce);
4743 guc_blocked_fence_complete(ce);
4744 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4747 guc_cancel_context_requests(ce);
4748 intel_engine_signal_breadcrumbs(ce->engine);
4752 decr_outstanding_submission_g2h(guc);
4753 intel_context_put(ce);
4758 static void capture_error_state(struct intel_guc *guc,
4759 struct intel_context *ce)
4761 struct intel_gt *gt = guc_to_gt(guc);
4762 struct drm_i915_private *i915 = gt->i915;
4763 intel_wakeref_t wakeref;
4764 intel_engine_mask_t engine_mask;
4766 if (intel_engine_is_virtual(ce->engine)) {
4767 struct intel_engine_cs *e;
4768 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask;
4771 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) {
4772 bool match = intel_guc_capture_is_matching_engine(gt, ce, e);
4775 intel_engine_set_hung_context(e, ce);
4776 engine_mask |= e->mask;
4777 atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]);
4782 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s",
4783 ce->guc_id.id, ce->engine->name);
4787 intel_engine_set_hung_context(ce->engine, ce);
4788 engine_mask = ce->engine->mask;
4789 atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]);
4792 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4793 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4796 static void guc_context_replay(struct intel_context *ce)
4798 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4800 __guc_reset_context(ce, ce->engine->mask);
4801 tasklet_hi_schedule(&sched_engine->tasklet);
4804 static void guc_handle_context_reset(struct intel_guc *guc,
4805 struct intel_context *ce)
4807 trace_intel_context_reset(ce);
4809 guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
4810 ce->guc_id.id, ce->engine->name,
4811 str_yes_no(intel_context_is_exiting(ce)),
4812 str_yes_no(intel_context_is_banned(ce)));
4814 if (likely(intel_context_is_schedulable(ce))) {
4815 capture_error_state(guc, ce);
4816 guc_context_replay(ce);
4818 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
4819 ce->guc_id.id, ce->engine->name);
4823 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4824 const u32 *msg, u32 len)
4826 struct intel_context *ce;
4827 unsigned long flags;
4830 if (unlikely(len != 1)) {
4831 guc_err(guc, "Invalid length %u", len);
4838 * The context lookup uses the xarray but lookups only require an RCU lock
4839 * not the full spinlock. So take the lock explicitly and keep it until the
4840 * context has been reference count locked to ensure it can't be destroyed
4841 * asynchronously until the reset is done.
4843 xa_lock_irqsave(&guc->context_lookup, flags);
4844 ce = g2h_context_lookup(guc, ctx_id);
4846 intel_context_get(ce);
4847 xa_unlock_irqrestore(&guc->context_lookup, flags);
4852 guc_handle_context_reset(guc, ce);
4853 intel_context_put(ce);
4858 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4859 const u32 *msg, u32 len)
4863 if (unlikely(len != 1)) {
4864 guc_dbg(guc, "Invalid length %u", len);
4868 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4869 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4870 guc_warn(guc, "No space for error capture");
4872 intel_guc_capture_process(guc);
4877 struct intel_engine_cs *
4878 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4880 struct intel_gt *gt = guc_to_gt(guc);
4881 u8 engine_class = guc_class_to_engine_class(guc_class);
4883 /* Class index is checked in class converter */
4884 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4886 return gt->engine_class[engine_class][instance];
4889 static void reset_fail_worker_func(struct work_struct *w)
4891 struct intel_guc *guc = container_of(w, struct intel_guc,
4892 submission_state.reset_fail_worker);
4893 struct intel_gt *gt = guc_to_gt(guc);
4894 intel_engine_mask_t reset_fail_mask;
4895 unsigned long flags;
4897 spin_lock_irqsave(&guc->submission_state.lock, flags);
4898 reset_fail_mask = guc->submission_state.reset_fail_mask;
4899 guc->submission_state.reset_fail_mask = 0;
4900 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4902 if (likely(reset_fail_mask)) {
4903 struct intel_engine_cs *engine;
4904 enum intel_engine_id id;
4907 * GuC is toast at this point - it dead loops after sending the failed
4908 * reset notification. So need to manually determine the guilty context.
4909 * Note that it should be reliable to do this here because the GuC is
4910 * toast and will not be scheduling behind the KMD's back.
4912 for_each_engine_masked(engine, gt, reset_fail_mask, id)
4913 intel_guc_find_hung_context(engine);
4915 intel_gt_handle_error(gt, reset_fail_mask,
4917 "GuC failed to reset engine mask=0x%x",
4922 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4923 const u32 *msg, u32 len)
4925 struct intel_engine_cs *engine;
4926 u8 guc_class, instance;
4928 unsigned long flags;
4930 if (unlikely(len != 3)) {
4931 guc_err(guc, "Invalid length %u", len);
4939 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4940 if (unlikely(!engine)) {
4941 guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
4946 * This is an unexpected failure of a hardware feature. So, log a real
4947 * error message not just the informational that comes with the reset.
4949 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X",
4950 guc_class, instance, engine->name, reason);
4952 spin_lock_irqsave(&guc->submission_state.lock, flags);
4953 guc->submission_state.reset_fail_mask |= engine->mask;
4954 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4957 * A GT reset flushes this worker queue (G2H handler) so we must use
4958 * another worker to trigger a GT reset.
4960 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4965 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4967 struct intel_guc *guc = &engine->gt->uc.guc;
4968 struct intel_context *ce;
4969 struct i915_request *rq;
4970 unsigned long index;
4971 unsigned long flags;
4973 /* Reset called during driver load? GuC not yet initialised! */
4974 if (unlikely(!guc_submission_initialized(guc)))
4977 xa_lock_irqsave(&guc->context_lookup, flags);
4978 xa_for_each(&guc->context_lookup, index, ce) {
4981 if (!kref_get_unless_zero(&ce->ref))
4984 xa_unlock(&guc->context_lookup);
4986 if (!intel_context_is_pinned(ce))
4989 if (intel_engine_is_virtual(ce->engine)) {
4990 if (!(ce->engine->mask & engine->mask))
4993 if (ce->engine != engine)
4998 spin_lock(&ce->guc_state.lock);
4999 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
5000 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
5006 spin_unlock(&ce->guc_state.lock);
5009 intel_engine_set_hung_context(engine, ce);
5011 /* Can only cope with one hang at a time... */
5012 intel_context_put(ce);
5013 xa_lock(&guc->context_lookup);
5018 intel_context_put(ce);
5019 xa_lock(&guc->context_lookup);
5022 xa_unlock_irqrestore(&guc->context_lookup, flags);
5025 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
5026 struct i915_request *hung_rq,
5027 struct drm_printer *m)
5029 struct intel_guc *guc = &engine->gt->uc.guc;
5030 struct intel_context *ce;
5031 unsigned long index;
5032 unsigned long flags;
5034 /* Reset called during driver load? GuC not yet initialised! */
5035 if (unlikely(!guc_submission_initialized(guc)))
5038 xa_lock_irqsave(&guc->context_lookup, flags);
5039 xa_for_each(&guc->context_lookup, index, ce) {
5040 if (!kref_get_unless_zero(&ce->ref))
5043 xa_unlock(&guc->context_lookup);
5045 if (!intel_context_is_pinned(ce))
5048 if (intel_engine_is_virtual(ce->engine)) {
5049 if (!(ce->engine->mask & engine->mask))
5052 if (ce->engine != engine)
5056 spin_lock(&ce->guc_state.lock);
5057 intel_engine_dump_active_requests(&ce->guc_state.requests,
5059 spin_unlock(&ce->guc_state.lock);
5062 intel_context_put(ce);
5063 xa_lock(&guc->context_lookup);
5065 xa_unlock_irqrestore(&guc->context_lookup, flags);
5068 void intel_guc_submission_print_info(struct intel_guc *guc,
5069 struct drm_printer *p)
5071 struct i915_sched_engine *sched_engine = guc->sched_engine;
5073 unsigned long flags;
5078 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n",
5079 guc->submission_version.major, guc->submission_version.minor,
5080 guc->submission_version.patch);
5081 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
5082 atomic_read(&guc->outstanding_submission_g2h));
5083 drm_printf(p, "GuC tasklet count: %u\n",
5084 atomic_read(&sched_engine->tasklet.count));
5086 spin_lock_irqsave(&sched_engine->lock, flags);
5087 drm_printf(p, "Requests in GuC submit tasklet:\n");
5088 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
5089 struct i915_priolist *pl = to_priolist(rb);
5090 struct i915_request *rq;
5092 priolist_for_each_request(rq, pl)
5093 drm_printf(p, "guc_id=%u, seqno=%llu\n",
5094 rq->context->guc_id.id,
5097 spin_unlock_irqrestore(&sched_engine->lock, flags);
5098 drm_printf(p, "\n");
5101 static inline void guc_log_context_priority(struct drm_printer *p,
5102 struct intel_context *ce)
5106 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
5107 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
5108 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
5109 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
5110 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
5111 i, ce->guc_state.prio_count[i]);
5113 drm_printf(p, "\n");
5116 static inline void guc_log_context(struct drm_printer *p,
5117 struct intel_context *ce)
5119 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
5120 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
5121 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
5123 ce->lrc_reg_state[CTX_RING_HEAD]);
5124 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
5126 ce->lrc_reg_state[CTX_RING_TAIL]);
5127 drm_printf(p, "\t\tContext Pin Count: %u\n",
5128 atomic_read(&ce->pin_count));
5129 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
5130 atomic_read(&ce->guc_id.ref));
5131 drm_printf(p, "\t\tSchedule State: 0x%x\n",
5132 ce->guc_state.sched_state);
5135 void intel_guc_submission_print_context_info(struct intel_guc *guc,
5136 struct drm_printer *p)
5138 struct intel_context *ce;
5139 unsigned long index;
5140 unsigned long flags;
5142 xa_lock_irqsave(&guc->context_lookup, flags);
5143 xa_for_each(&guc->context_lookup, index, ce) {
5144 GEM_BUG_ON(intel_context_is_child(ce));
5146 guc_log_context(p, ce);
5147 guc_log_context_priority(p, ce);
5149 if (intel_context_is_parent(ce)) {
5150 struct intel_context *child;
5152 drm_printf(p, "\t\tNumber children: %u\n",
5153 ce->parallel.number_children);
5155 if (ce->parallel.guc.wq_status) {
5156 drm_printf(p, "\t\tWQI Head: %u\n",
5157 READ_ONCE(*ce->parallel.guc.wq_head));
5158 drm_printf(p, "\t\tWQI Tail: %u\n",
5159 READ_ONCE(*ce->parallel.guc.wq_tail));
5160 drm_printf(p, "\t\tWQI Status: %u\n",
5161 READ_ONCE(*ce->parallel.guc.wq_status));
5164 if (ce->engine->emit_bb_start ==
5165 emit_bb_start_parent_no_preempt_mid_batch) {
5168 drm_printf(p, "\t\tChildren Go: %u\n",
5169 get_children_go_value(ce));
5170 for (i = 0; i < ce->parallel.number_children; ++i)
5171 drm_printf(p, "\t\tChildren Join: %u\n",
5172 get_children_join_value(ce, i));
5175 for_each_child(ce, child)
5176 guc_log_context(p, child);
5179 xa_unlock_irqrestore(&guc->context_lookup, flags);
5182 static inline u32 get_children_go_addr(struct intel_context *ce)
5184 GEM_BUG_ON(!intel_context_is_parent(ce));
5186 return i915_ggtt_offset(ce->state) +
5187 __get_parent_scratch_offset(ce) +
5188 offsetof(struct parent_scratch, go.semaphore);
5191 static inline u32 get_children_join_addr(struct intel_context *ce,
5194 GEM_BUG_ON(!intel_context_is_parent(ce));
5196 return i915_ggtt_offset(ce->state) +
5197 __get_parent_scratch_offset(ce) +
5198 offsetof(struct parent_scratch, join[child_index].semaphore);
5201 #define PARENT_GO_BB 1
5202 #define PARENT_GO_FINI_BREADCRUMB 0
5203 #define CHILD_GO_BB 1
5204 #define CHILD_GO_FINI_BREADCRUMB 0
5205 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
5206 u64 offset, u32 len,
5207 const unsigned int flags)
5209 struct intel_context *ce = rq->context;
5213 GEM_BUG_ON(!intel_context_is_parent(ce));
5215 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
5219 /* Wait on children */
5220 for (i = 0; i < ce->parallel.number_children; ++i) {
5221 *cs++ = (MI_SEMAPHORE_WAIT |
5222 MI_SEMAPHORE_GLOBAL_GTT |
5224 MI_SEMAPHORE_SAD_EQ_SDD);
5225 *cs++ = PARENT_GO_BB;
5226 *cs++ = get_children_join_addr(ce, i);
5230 /* Turn off preemption */
5231 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5234 /* Tell children go */
5235 cs = gen8_emit_ggtt_write(cs,
5237 get_children_go_addr(ce),
5241 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5242 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5243 *cs++ = lower_32_bits(offset);
5244 *cs++ = upper_32_bits(offset);
5247 intel_ring_advance(rq, cs);
5252 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
5253 u64 offset, u32 len,
5254 const unsigned int flags)
5256 struct intel_context *ce = rq->context;
5257 struct intel_context *parent = intel_context_to_parent(ce);
5260 GEM_BUG_ON(!intel_context_is_child(ce));
5262 cs = intel_ring_begin(rq, 12);
5267 cs = gen8_emit_ggtt_write(cs,
5269 get_children_join_addr(parent,
5270 ce->parallel.child_index),
5273 /* Wait on parent for go */
5274 *cs++ = (MI_SEMAPHORE_WAIT |
5275 MI_SEMAPHORE_GLOBAL_GTT |
5277 MI_SEMAPHORE_SAD_EQ_SDD);
5278 *cs++ = CHILD_GO_BB;
5279 *cs++ = get_children_go_addr(parent);
5282 /* Turn off preemption */
5283 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5286 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5287 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5288 *cs++ = lower_32_bits(offset);
5289 *cs++ = upper_32_bits(offset);
5291 intel_ring_advance(rq, cs);
5297 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5300 struct intel_context *ce = rq->context;
5303 GEM_BUG_ON(!intel_context_is_parent(ce));
5305 /* Wait on children */
5306 for (i = 0; i < ce->parallel.number_children; ++i) {
5307 *cs++ = (MI_SEMAPHORE_WAIT |
5308 MI_SEMAPHORE_GLOBAL_GTT |
5310 MI_SEMAPHORE_SAD_EQ_SDD);
5311 *cs++ = PARENT_GO_FINI_BREADCRUMB;
5312 *cs++ = get_children_join_addr(ce, i);
5316 /* Turn on preemption */
5317 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5320 /* Tell children go */
5321 cs = gen8_emit_ggtt_write(cs,
5322 CHILD_GO_FINI_BREADCRUMB,
5323 get_children_go_addr(ce),
5330 * If this true, a submission of multi-lrc requests had an error and the
5331 * requests need to be skipped. The front end (execuf IOCTL) should've called
5332 * i915_request_skip which squashes the BB but we still need to emit the fini
5333 * breadrcrumbs seqno write. At this point we don't know how many of the
5334 * requests in the multi-lrc submission were generated so we can't do the
5335 * handshake between the parent and children (e.g. if 4 requests should be
5336 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
5337 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
5338 * has occurred on any of the requests in submission / relationship.
5340 static inline bool skip_handshake(struct i915_request *rq)
5342 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
5345 #define NON_SKIP_LEN 6
5347 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5350 struct intel_context *ce = rq->context;
5351 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5352 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5354 GEM_BUG_ON(!intel_context_is_parent(ce));
5356 if (unlikely(skip_handshake(rq))) {
5358 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
5359 * the NON_SKIP_LEN comes from the length of the emits below.
5361 memset(cs, 0, sizeof(u32) *
5362 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5363 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5365 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5368 /* Emit fini breadcrumb */
5369 before_fini_breadcrumb_user_interrupt_cs = cs;
5370 cs = gen8_emit_ggtt_write(cs,
5372 i915_request_active_timeline(rq)->hwsp_offset,
5375 /* User interrupt */
5376 *cs++ = MI_USER_INTERRUPT;
5379 /* Ensure our math for skip + emit is correct */
5380 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5382 GEM_BUG_ON(start_fini_breadcrumb_cs +
5383 ce->engine->emit_fini_breadcrumb_dw != cs);
5385 rq->tail = intel_ring_offset(rq, cs);
5391 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5394 struct intel_context *ce = rq->context;
5395 struct intel_context *parent = intel_context_to_parent(ce);
5397 GEM_BUG_ON(!intel_context_is_child(ce));
5399 /* Turn on preemption */
5400 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5404 cs = gen8_emit_ggtt_write(cs,
5405 PARENT_GO_FINI_BREADCRUMB,
5406 get_children_join_addr(parent,
5407 ce->parallel.child_index),
5410 /* Wait parent on for go */
5411 *cs++ = (MI_SEMAPHORE_WAIT |
5412 MI_SEMAPHORE_GLOBAL_GTT |
5414 MI_SEMAPHORE_SAD_EQ_SDD);
5415 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5416 *cs++ = get_children_go_addr(parent);
5423 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5426 struct intel_context *ce = rq->context;
5427 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5428 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5430 GEM_BUG_ON(!intel_context_is_child(ce));
5432 if (unlikely(skip_handshake(rq))) {
5434 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5435 * the NON_SKIP_LEN comes from the length of the emits below.
5437 memset(cs, 0, sizeof(u32) *
5438 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5439 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5441 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5444 /* Emit fini breadcrumb */
5445 before_fini_breadcrumb_user_interrupt_cs = cs;
5446 cs = gen8_emit_ggtt_write(cs,
5448 i915_request_active_timeline(rq)->hwsp_offset,
5451 /* User interrupt */
5452 *cs++ = MI_USER_INTERRUPT;
5455 /* Ensure our math for skip + emit is correct */
5456 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5458 GEM_BUG_ON(start_fini_breadcrumb_cs +
5459 ce->engine->emit_fini_breadcrumb_dw != cs);
5461 rq->tail = intel_ring_offset(rq, cs);
5468 static struct intel_context *
5469 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5470 unsigned long flags)
5472 struct guc_virtual_engine *ve;
5473 struct intel_guc *guc;
5477 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5479 return ERR_PTR(-ENOMEM);
5481 guc = &siblings[0]->gt->uc.guc;
5483 ve->base.i915 = siblings[0]->i915;
5484 ve->base.gt = siblings[0]->gt;
5485 ve->base.uncore = siblings[0]->uncore;
5488 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5489 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5490 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5491 ve->base.saturated = ALL_ENGINES;
5493 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5495 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5497 ve->base.cops = &virtual_guc_context_ops;
5498 ve->base.request_alloc = guc_request_alloc;
5499 ve->base.bump_serial = virtual_guc_bump_serial;
5501 ve->base.submit_request = guc_submit_request;
5503 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5505 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
5506 ve->base.mask = VIRTUAL_ENGINES;
5508 intel_context_init(&ve->context, &ve->base);
5510 for (n = 0; n < count; n++) {
5511 struct intel_engine_cs *sibling = siblings[n];
5513 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5514 if (sibling->mask & ve->base.mask) {
5515 guc_dbg(guc, "duplicate %s entry in load balancer\n",
5521 ve->base.mask |= sibling->mask;
5522 ve->base.logical_mask |= sibling->logical_mask;
5524 if (n != 0 && ve->base.class != sibling->class) {
5525 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
5526 sibling->class, ve->base.class);
5529 } else if (n == 0) {
5530 ve->base.class = sibling->class;
5531 ve->base.uabi_class = sibling->uabi_class;
5532 snprintf(ve->base.name, sizeof(ve->base.name),
5533 "v%dx%d", ve->base.class, count);
5534 ve->base.context_size = sibling->context_size;
5536 ve->base.add_active_request =
5537 sibling->add_active_request;
5538 ve->base.remove_active_request =
5539 sibling->remove_active_request;
5540 ve->base.emit_bb_start = sibling->emit_bb_start;
5541 ve->base.emit_flush = sibling->emit_flush;
5542 ve->base.emit_init_breadcrumb =
5543 sibling->emit_init_breadcrumb;
5544 ve->base.emit_fini_breadcrumb =
5545 sibling->emit_fini_breadcrumb;
5546 ve->base.emit_fini_breadcrumb_dw =
5547 sibling->emit_fini_breadcrumb_dw;
5548 ve->base.breadcrumbs =
5549 intel_breadcrumbs_get(sibling->breadcrumbs);
5551 ve->base.flags |= sibling->flags;
5553 ve->base.props.timeslice_duration_ms =
5554 sibling->props.timeslice_duration_ms;
5555 ve->base.props.preempt_timeout_ms =
5556 sibling->props.preempt_timeout_ms;
5560 return &ve->context;
5563 intel_context_put(&ve->context);
5564 return ERR_PTR(err);
5567 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5569 struct intel_engine_cs *engine;
5570 intel_engine_mask_t tmp, mask = ve->mask;
5572 for_each_engine_masked(engine, ve->gt, mask, tmp)
5573 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5579 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5580 #include "selftest_guc.c"
5581 #include "selftest_guc_multi_lrc.c"
5582 #include "selftest_guc_hangcheck.c"