drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150 #include "shmem_utils.h"
 151
 152 #define RING_EXECLIST_QFULL             (1 << 0x2)
 153 #define RING_EXECLIST1_VALID            (1 << 0x3)
 154 #define RING_EXECLIST0_VALID            (1 << 0x4)
 155 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 156 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 157 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 158
 159 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 160 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 161 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 162 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 163 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 164 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 165
 166 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 167          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 168
 169 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 170
 171 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 172 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 173 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 174 #define GEN12_IDLE_CTX_ID               0x7FF
 175 #define GEN12_CSB_CTX_VALID(csb_dw) \
 176         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 177
 178 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 179 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 180
 181 struct virtual_engine {
 182         struct intel_engine_cs base;
 183         struct intel_context context;
 184
 185         /*
 186          * We allow only a single request through the virtual engine at a time
 187          * (each request in the timeline waits for the completion fence of
 188          * the previous before being submitted). By restricting ourselves to
 189          * only submitting a single request, each request is placed on to a
 190          * physical to maximise load spreading (by virtue of the late greedy
 191          * scheduling -- each real engine takes the next available request
 192          * upon idling).
 193          */
 194         struct i915_request *request;
 195
 196         /*
 197          * We keep a rbtree of available virtual engines inside each physical
 198          * engine, sorted by priority. Here we preallocate the nodes we need
 199          * for the virtual engine, indexed by physical_engine->id.
 200          */
 201         struct ve_node {
 202                 struct rb_node rb;
 203                 int prio;
 204         } nodes[I915_NUM_ENGINES];
 205
 206         /*
 207          * Keep track of bonded pairs -- restrictions upon on our selection
 208          * of physical engines any particular request may be submitted to.
 209          * If we receive a submit-fence from a master engine, we will only
 210          * use one of sibling_mask physical engines.
 211          */
 212         struct ve_bond {
 213                 const struct intel_engine_cs *master;
 214                 intel_engine_mask_t sibling_mask;
 215         } *bonds;
 216         unsigned int num_bonds;
 217
 218         /* And finally, which physical engines this virtual engine maps onto. */
 219         unsigned int num_siblings;
 220         struct intel_engine_cs *siblings[];
 221 };
 222
 223 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 224 {
 225         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 226         return container_of(engine, struct virtual_engine, base);
 227 }
 228
 229 static int __execlists_context_alloc(struct intel_context *ce,
 230                                      struct intel_engine_cs *engine);
 231
 232 static void execlists_init_reg_state(u32 *reg_state,
 233                                      const struct intel_context *ce,
 234                                      const struct intel_engine_cs *engine,
 235                                      const struct intel_ring *ring,
 236                                      bool close);
 237 static void
 238 __execlists_update_reg_state(const struct intel_context *ce,
 239                              const struct intel_engine_cs *engine,
 240                              u32 head);
 241
 242 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 243 {
 244         if (INTEL_GEN(engine->i915) >= 12)
 245                 return 0x60;
 246         else if (INTEL_GEN(engine->i915) >= 9)
 247                 return 0x54;
 248         else if (engine->class == RENDER_CLASS)
 249                 return 0x58;
 250         else
 251                 return -1;
 252 }
 253
 254 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 255 {
 256         if (INTEL_GEN(engine->i915) >= 12)
 257                 return 0x74;
 258         else if (INTEL_GEN(engine->i915) >= 9)
 259                 return 0x68;
 260         else if (engine->class == RENDER_CLASS)
 261                 return 0xd8;
 262         else
 263                 return -1;
 264 }
 265
 266 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 267 {
 268         if (INTEL_GEN(engine->i915) >= 12)
 269                 return 0x12;
 270         else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 271                 return 0x18;
 272         else
 273                 return -1;
 274 }
 275
 276 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 277 {
 278         int x;
 279
 280         x = lrc_ring_wa_bb_per_ctx(engine);
 281         if (x < 0)
 282                 return x;
 283
 284         return x + 2;
 285 }
 286
 287 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 288 {
 289         int x;
 290
 291         x = lrc_ring_indirect_ptr(engine);
 292         if (x < 0)
 293                 return x;
 294
 295         return x + 2;
 296 }
 297
 298 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 299 {
 300         if (engine->class != RENDER_CLASS)
 301                 return -1;
 302
 303         if (INTEL_GEN(engine->i915) >= 12)
 304                 return 0xb6;
 305         else if (INTEL_GEN(engine->i915) >= 11)
 306                 return 0xaa;
 307         else
 308                 return -1;
 309 }
 310
 311 static u32
 312 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 313 {
 314         switch (INTEL_GEN(engine->i915)) {
 315         default:
 316                 MISSING_CASE(INTEL_GEN(engine->i915));
 317                 fallthrough;
 318         case 12:
 319                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 320         case 11:
 321                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 322         case 10:
 323                 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 324         case 9:
 325                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 326         case 8:
 327                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 328         }
 329 }
 330
 331 static void
 332 lrc_ring_setup_indirect_ctx(u32 *regs,
 333                             const struct intel_engine_cs *engine,
 334                             u32 ctx_bb_ggtt_addr,
 335                             u32 size)
 336 {
 337         GEM_BUG_ON(!size);
 338         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 339         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 340         regs[lrc_ring_indirect_ptr(engine) + 1] =
 341                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 342
 343         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 344         regs[lrc_ring_indirect_offset(engine) + 1] =
 345                 lrc_ring_indirect_offset_default(engine) << 6;
 346 }
 347
 348 static u32 intel_context_get_runtime(const struct intel_context *ce)
 349 {
 350         /*
 351          * We can use either ppHWSP[16] which is recorded before the context
 352          * switch (and so excludes the cost of context switches) or use the
 353          * value from the context image itself, which is saved/restored earlier
 354          * and so includes the cost of the save.
 355          */
 356         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
 357 }
 358
 359 static void mark_eio(struct i915_request *rq)
 360 {
 361         if (i915_request_completed(rq))
 362                 return;
 363
 364         GEM_BUG_ON(i915_request_signaled(rq));
 365
 366         i915_request_set_error_once(rq, -EIO);
 367         i915_request_mark_complete(rq);
 368 }
 369
 370 static struct i915_request *
 371 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 372 {
 373         struct i915_request *active = rq;
 374
 375         rcu_read_lock();
 376         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 377                 if (i915_request_completed(rq))
 378                         break;
 379
 380                 active = rq;
 381         }
 382         rcu_read_unlock();
 383
 384         return active;
 385 }
 386
 387 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 388 {
 389         return (i915_ggtt_offset(engine->status_page.vma) +
 390                 I915_GEM_HWS_PREEMPT_ADDR);
 391 }
 392
 393 static inline void
 394 ring_set_paused(const struct intel_engine_cs *engine, int state)
 395 {
 396         /*
 397          * We inspect HWS_PREEMPT with a semaphore inside
 398          * engine->emit_fini_breadcrumb. If the dword is true,
 399          * the ring is paused as the semaphore will busywait
 400          * until the dword is false.
 401          */
 402         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 403         if (state)
 404                 wmb();
 405 }
 406
 407 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 408 {
 409         return rb_entry(rb, struct i915_priolist, node);
 410 }
 411
 412 static inline int rq_prio(const struct i915_request *rq)
 413 {
 414         return READ_ONCE(rq->sched.attr.priority);
 415 }
 416
 417 static int effective_prio(const struct i915_request *rq)
 418 {
 419         int prio = rq_prio(rq);
 420
 421         /*
 422          * If this request is special and must not be interrupted at any
 423          * cost, so be it. Note we are only checking the most recent request
 424          * in the context and so may be masking an earlier vip request. It
 425          * is hoped that under the conditions where nopreempt is used, this
 426          * will not matter (i.e. all requests to that context will be
 427          * nopreempt for as long as desired).
 428          */
 429         if (i915_request_has_nopreempt(rq))
 430                 prio = I915_PRIORITY_UNPREEMPTABLE;
 431
 432         return prio;
 433 }
 434
 435 static int queue_prio(const struct intel_engine_execlists *execlists)
 436 {
 437         struct i915_priolist *p;
 438         struct rb_node *rb;
 439
 440         rb = rb_first_cached(&execlists->queue);
 441         if (!rb)
 442                 return INT_MIN;
 443
 444         /*
 445          * As the priolist[] are inverted, with the highest priority in [0],
 446          * we have to flip the index value to become priority.
 447          */
 448         p = to_priolist(rb);
 449         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 450 }
 451
 452 static inline bool need_preempt(const struct intel_engine_cs *engine,
 453                                 const struct i915_request *rq,
 454                                 struct rb_node *rb)
 455 {
 456         int last_prio;
 457
 458         if (!intel_engine_has_semaphores(engine))
 459                 return false;
 460
 461         /*
 462          * Check if the current priority hint merits a preemption attempt.
 463          *
 464          * We record the highest value priority we saw during rescheduling
 465          * prior to this dequeue, therefore we know that if it is strictly
 466          * less than the current tail of ESLP[0], we do not need to force
 467          * a preempt-to-idle cycle.
 468          *
 469          * However, the priority hint is a mere hint that we may need to
 470          * preempt. If that hint is stale or we may be trying to preempt
 471          * ourselves, ignore the request.
 472          *
 473          * More naturally we would write
 474          *      prio >= max(0, last);
 475          * except that we wish to prevent triggering preemption at the same
 476          * priority level: the task that is running should remain running
 477          * to preserve FIFO ordering of dependencies.
 478          */
 479         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 480         if (engine->execlists.queue_priority_hint <= last_prio)
 481                 return false;
 482
 483         /*
 484          * Check against the first request in ELSP[1], it will, thanks to the
 485          * power of PI, be the highest priority of that context.
 486          */
 487         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 488             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 489                 return true;
 490
 491         if (rb) {
 492                 struct virtual_engine *ve =
 493                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 494                 bool preempt = false;
 495
 496                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 497                         struct i915_request *next;
 498
 499                         rcu_read_lock();
 500                         next = READ_ONCE(ve->request);
 501                         if (next)
 502                                 preempt = rq_prio(next) > last_prio;
 503                         rcu_read_unlock();
 504                 }
 505
 506                 if (preempt)
 507                         return preempt;
 508         }
 509
 510         /*
 511          * If the inflight context did not trigger the preemption, then maybe
 512          * it was the set of queued requests? Pick the highest priority in
 513          * the queue (the first active priolist) and see if it deserves to be
 514          * running instead of ELSP[0].
 515          *
 516          * The highest priority request in the queue can not be either
 517          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 518          * context, it's priority would not exceed ELSP[0] aka last_prio.
 519          */
 520         return queue_prio(&engine->execlists) > last_prio;
 521 }
 522
 523 __maybe_unused static inline bool
 524 assert_priority_queue(const struct i915_request *prev,
 525                       const struct i915_request *next)
 526 {
 527         /*
 528          * Without preemption, the prev may refer to the still active element
 529          * which we refuse to let go.
 530          *
 531          * Even with preemption, there are times when we think it is better not
 532          * to preempt and leave an ostensibly lower priority request in flight.
 533          */
 534         if (i915_request_is_active(prev))
 535                 return true;
 536
 537         return rq_prio(prev) >= rq_prio(next);
 538 }
 539
 540 /*
 541  * The context descriptor encodes various attributes of a context,
 542  * including its GTT address and some flags. Because it's fairly
 543  * expensive to calculate, we'll just do it once and cache the result,
 544  * which remains valid until the context is unpinned.
 545  *
 546  * This is what a descriptor looks like, from LSB to MSB::
 547  *
 548  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 549  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 550  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 551  *      bits 53-54:    mbz, reserved for use by hardware
 552  *      bits 55-63:    group ID, currently unused and set to 0
 553  *
 554  * Starting from Gen11, the upper dword of the descriptor has a new format:
 555  *
 556  *      bits 32-36:    reserved
 557  *      bits 37-47:    SW context ID
 558  *      bits 48:53:    engine instance
 559  *      bit 54:        mbz, reserved for use by hardware
 560  *      bits 55-60:    SW counter
 561  *      bits 61-63:    engine class
 562  *
 563  * engine info, SW context ID and SW counter need to form a unique number
 564  * (Context ID) per lrc.
 565  */
 566 static u32
 567 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 568 {
 569         u32 desc;
 570
 571         desc = INTEL_LEGACY_32B_CONTEXT;
 572         if (i915_vm_is_4lvl(ce->vm))
 573                 desc = INTEL_LEGACY_64B_CONTEXT;
 574         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 575
 576         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 577         if (IS_GEN(engine->i915, 8))
 578                 desc |= GEN8_CTX_L3LLC_COHERENT;
 579
 580         return i915_ggtt_offset(ce->state) | desc;
 581 }
 582
 583 static inline unsigned int dword_in_page(void *addr)
 584 {
 585         return offset_in_page(addr) / sizeof(u32);
 586 }
 587
 588 static void set_offsets(u32 *regs,
 589                         const u8 *data,
 590                         const struct intel_engine_cs *engine,
 591                         bool clear)
 592 #define NOP(x) (BIT(7) | (x))
 593 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 594 #define POSTED BIT(0)
 595 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 596 #define REG16(x) \
 597         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 598         (((x) >> 2) & 0x7f)
 599 #define END(total_state_size) 0, (total_state_size)
 600 {
 601         const u32 base = engine->mmio_base;
 602
 603         while (*data) {
 604                 u8 count, flags;
 605
 606                 if (*data & BIT(7)) { /* skip */
 607                         count = *data++ & ~BIT(7);
 608                         if (clear)
 609                                 memset32(regs, MI_NOOP, count);
 610                         regs += count;
 611                         continue;
 612                 }
 613
 614                 count = *data & 0x3f;
 615                 flags = *data >> 6;
 616                 data++;
 617
 618                 *regs = MI_LOAD_REGISTER_IMM(count);
 619                 if (flags & POSTED)
 620                         *regs |= MI_LRI_FORCE_POSTED;
 621                 if (INTEL_GEN(engine->i915) >= 11)
 622                         *regs |= MI_LRI_LRM_CS_MMIO;
 623                 regs++;
 624
 625                 GEM_BUG_ON(!count);
 626                 do {
 627                         u32 offset = 0;
 628                         u8 v;
 629
 630                         do {
 631                                 v = *data++;
 632                                 offset <<= 7;
 633                                 offset |= v & ~BIT(7);
 634                         } while (v & BIT(7));
 635
 636                         regs[0] = base + (offset << 2);
 637                         if (clear)
 638                                 regs[1] = 0;
 639                         regs += 2;
 640                 } while (--count);
 641         }
 642
 643         if (clear) {
 644                 u8 count = *++data;
 645
 646                 /* Clear past the tail for HW access */
 647                 GEM_BUG_ON(dword_in_page(regs) > count);
 648                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 649
 650                 /* Close the batch; used mainly by live_lrc_layout() */
 651                 *regs = MI_BATCH_BUFFER_END;
 652                 if (INTEL_GEN(engine->i915) >= 10)
 653                         *regs |= BIT(0);
 654         }
 655 }
 656
 657 static const u8 gen8_xcs_offsets[] = {
 658         NOP(1),
 659         LRI(11, 0),
 660         REG16(0x244),
 661         REG(0x034),
 662         REG(0x030),
 663         REG(0x038),
 664         REG(0x03c),
 665         REG(0x168),
 666         REG(0x140),
 667         REG(0x110),
 668         REG(0x11c),
 669         REG(0x114),
 670         REG(0x118),
 671
 672         NOP(9),
 673         LRI(9, 0),
 674         REG16(0x3a8),
 675         REG16(0x28c),
 676         REG16(0x288),
 677         REG16(0x284),
 678         REG16(0x280),
 679         REG16(0x27c),
 680         REG16(0x278),
 681         REG16(0x274),
 682         REG16(0x270),
 683
 684         NOP(13),
 685         LRI(2, 0),
 686         REG16(0x200),
 687         REG(0x028),
 688
 689         END(80)
 690 };
 691
 692 static const u8 gen9_xcs_offsets[] = {
 693         NOP(1),
 694         LRI(14, POSTED),
 695         REG16(0x244),
 696         REG(0x034),
 697         REG(0x030),
 698         REG(0x038),
 699         REG(0x03c),
 700         REG(0x168),
 701         REG(0x140),
 702         REG(0x110),
 703         REG(0x11c),
 704         REG(0x114),
 705         REG(0x118),
 706         REG(0x1c0),
 707         REG(0x1c4),
 708         REG(0x1c8),
 709
 710         NOP(3),
 711         LRI(9, POSTED),
 712         REG16(0x3a8),
 713         REG16(0x28c),
 714         REG16(0x288),
 715         REG16(0x284),
 716         REG16(0x280),
 717         REG16(0x27c),
 718         REG16(0x278),
 719         REG16(0x274),
 720         REG16(0x270),
 721
 722         NOP(13),
 723         LRI(1, POSTED),
 724         REG16(0x200),
 725
 726         NOP(13),
 727         LRI(44, POSTED),
 728         REG(0x028),
 729         REG(0x09c),
 730         REG(0x0c0),
 731         REG(0x178),
 732         REG(0x17c),
 733         REG16(0x358),
 734         REG(0x170),
 735         REG(0x150),
 736         REG(0x154),
 737         REG(0x158),
 738         REG16(0x41c),
 739         REG16(0x600),
 740         REG16(0x604),
 741         REG16(0x608),
 742         REG16(0x60c),
 743         REG16(0x610),
 744         REG16(0x614),
 745         REG16(0x618),
 746         REG16(0x61c),
 747         REG16(0x620),
 748         REG16(0x624),
 749         REG16(0x628),
 750         REG16(0x62c),
 751         REG16(0x630),
 752         REG16(0x634),
 753         REG16(0x638),
 754         REG16(0x63c),
 755         REG16(0x640),
 756         REG16(0x644),
 757         REG16(0x648),
 758         REG16(0x64c),
 759         REG16(0x650),
 760         REG16(0x654),
 761         REG16(0x658),
 762         REG16(0x65c),
 763         REG16(0x660),
 764         REG16(0x664),
 765         REG16(0x668),
 766         REG16(0x66c),
 767         REG16(0x670),
 768         REG16(0x674),
 769         REG16(0x678),
 770         REG16(0x67c),
 771         REG(0x068),
 772
 773         END(176)
 774 };
 775
 776 static const u8 gen12_xcs_offsets[] = {
 777         NOP(1),
 778         LRI(13, POSTED),
 779         REG16(0x244),
 780         REG(0x034),
 781         REG(0x030),
 782         REG(0x038),
 783         REG(0x03c),
 784         REG(0x168),
 785         REG(0x140),
 786         REG(0x110),
 787         REG(0x1c0),
 788         REG(0x1c4),
 789         REG(0x1c8),
 790         REG(0x180),
 791         REG16(0x2b4),
 792
 793         NOP(5),
 794         LRI(9, POSTED),
 795         REG16(0x3a8),
 796         REG16(0x28c),
 797         REG16(0x288),
 798         REG16(0x284),
 799         REG16(0x280),
 800         REG16(0x27c),
 801         REG16(0x278),
 802         REG16(0x274),
 803         REG16(0x270),
 804
 805         END(80)
 806 };
 807
 808 static const u8 gen8_rcs_offsets[] = {
 809         NOP(1),
 810         LRI(14, POSTED),
 811         REG16(0x244),
 812         REG(0x034),
 813         REG(0x030),
 814         REG(0x038),
 815         REG(0x03c),
 816         REG(0x168),
 817         REG(0x140),
 818         REG(0x110),
 819         REG(0x11c),
 820         REG(0x114),
 821         REG(0x118),
 822         REG(0x1c0),
 823         REG(0x1c4),
 824         REG(0x1c8),
 825
 826         NOP(3),
 827         LRI(9, POSTED),
 828         REG16(0x3a8),
 829         REG16(0x28c),
 830         REG16(0x288),
 831         REG16(0x284),
 832         REG16(0x280),
 833         REG16(0x27c),
 834         REG16(0x278),
 835         REG16(0x274),
 836         REG16(0x270),
 837
 838         NOP(13),
 839         LRI(1, 0),
 840         REG(0x0c8),
 841
 842         END(80)
 843 };
 844
 845 static const u8 gen9_rcs_offsets[] = {
 846         NOP(1),
 847         LRI(14, POSTED),
 848         REG16(0x244),
 849         REG(0x34),
 850         REG(0x30),
 851         REG(0x38),
 852         REG(0x3c),
 853         REG(0x168),
 854         REG(0x140),
 855         REG(0x110),
 856         REG(0x11c),
 857         REG(0x114),
 858         REG(0x118),
 859         REG(0x1c0),
 860         REG(0x1c4),
 861         REG(0x1c8),
 862
 863         NOP(3),
 864         LRI(9, POSTED),
 865         REG16(0x3a8),
 866         REG16(0x28c),
 867         REG16(0x288),
 868         REG16(0x284),
 869         REG16(0x280),
 870         REG16(0x27c),
 871         REG16(0x278),
 872         REG16(0x274),
 873         REG16(0x270),
 874
 875         NOP(13),
 876         LRI(1, 0),
 877         REG(0xc8),
 878
 879         NOP(13),
 880         LRI(44, POSTED),
 881         REG(0x28),
 882         REG(0x9c),
 883         REG(0xc0),
 884         REG(0x178),
 885         REG(0x17c),
 886         REG16(0x358),
 887         REG(0x170),
 888         REG(0x150),
 889         REG(0x154),
 890         REG(0x158),
 891         REG16(0x41c),
 892         REG16(0x600),
 893         REG16(0x604),
 894         REG16(0x608),
 895         REG16(0x60c),
 896         REG16(0x610),
 897         REG16(0x614),
 898         REG16(0x618),
 899         REG16(0x61c),
 900         REG16(0x620),
 901         REG16(0x624),
 902         REG16(0x628),
 903         REG16(0x62c),
 904         REG16(0x630),
 905         REG16(0x634),
 906         REG16(0x638),
 907         REG16(0x63c),
 908         REG16(0x640),
 909         REG16(0x644),
 910         REG16(0x648),
 911         REG16(0x64c),
 912         REG16(0x650),
 913         REG16(0x654),
 914         REG16(0x658),
 915         REG16(0x65c),
 916         REG16(0x660),
 917         REG16(0x664),
 918         REG16(0x668),
 919         REG16(0x66c),
 920         REG16(0x670),
 921         REG16(0x674),
 922         REG16(0x678),
 923         REG16(0x67c),
 924         REG(0x68),
 925
 926         END(176)
 927 };
 928
 929 static const u8 gen11_rcs_offsets[] = {
 930         NOP(1),
 931         LRI(15, POSTED),
 932         REG16(0x244),
 933         REG(0x034),
 934         REG(0x030),
 935         REG(0x038),
 936         REG(0x03c),
 937         REG(0x168),
 938         REG(0x140),
 939         REG(0x110),
 940         REG(0x11c),
 941         REG(0x114),
 942         REG(0x118),
 943         REG(0x1c0),
 944         REG(0x1c4),
 945         REG(0x1c8),
 946         REG(0x180),
 947
 948         NOP(1),
 949         LRI(9, POSTED),
 950         REG16(0x3a8),
 951         REG16(0x28c),
 952         REG16(0x288),
 953         REG16(0x284),
 954         REG16(0x280),
 955         REG16(0x27c),
 956         REG16(0x278),
 957         REG16(0x274),
 958         REG16(0x270),
 959
 960         LRI(1, POSTED),
 961         REG(0x1b0),
 962
 963         NOP(10),
 964         LRI(1, 0),
 965         REG(0x0c8),
 966
 967         END(80)
 968 };
 969
 970 static const u8 gen12_rcs_offsets[] = {
 971         NOP(1),
 972         LRI(13, POSTED),
 973         REG16(0x244),
 974         REG(0x034),
 975         REG(0x030),
 976         REG(0x038),
 977         REG(0x03c),
 978         REG(0x168),
 979         REG(0x140),
 980         REG(0x110),
 981         REG(0x1c0),
 982         REG(0x1c4),
 983         REG(0x1c8),
 984         REG(0x180),
 985         REG16(0x2b4),
 986
 987         NOP(5),
 988         LRI(9, POSTED),
 989         REG16(0x3a8),
 990         REG16(0x28c),
 991         REG16(0x288),
 992         REG16(0x284),
 993         REG16(0x280),
 994         REG16(0x27c),
 995         REG16(0x278),
 996         REG16(0x274),
 997         REG16(0x270),
 998
 999         LRI(3, POSTED),
1000         REG(0x1b0),
1001         REG16(0x5a8),
1002         REG16(0x5ac),
1003
1004         NOP(6),
1005         LRI(1, 0),
1006         REG(0x0c8),
1007         NOP(3 + 9 + 1),
1008
1009         LRI(51, POSTED),
1010         REG16(0x588),
1011         REG16(0x588),
1012         REG16(0x588),
1013         REG16(0x588),
1014         REG16(0x588),
1015         REG16(0x588),
1016         REG(0x028),
1017         REG(0x09c),
1018         REG(0x0c0),
1019         REG(0x178),
1020         REG(0x17c),
1021         REG16(0x358),
1022         REG(0x170),
1023         REG(0x150),
1024         REG(0x154),
1025         REG(0x158),
1026         REG16(0x41c),
1027         REG16(0x600),
1028         REG16(0x604),
1029         REG16(0x608),
1030         REG16(0x60c),
1031         REG16(0x610),
1032         REG16(0x614),
1033         REG16(0x618),
1034         REG16(0x61c),
1035         REG16(0x620),
1036         REG16(0x624),
1037         REG16(0x628),
1038         REG16(0x62c),
1039         REG16(0x630),
1040         REG16(0x634),
1041         REG16(0x638),
1042         REG16(0x63c),
1043         REG16(0x640),
1044         REG16(0x644),
1045         REG16(0x648),
1046         REG16(0x64c),
1047         REG16(0x650),
1048         REG16(0x654),
1049         REG16(0x658),
1050         REG16(0x65c),
1051         REG16(0x660),
1052         REG16(0x664),
1053         REG16(0x668),
1054         REG16(0x66c),
1055         REG16(0x670),
1056         REG16(0x674),
1057         REG16(0x678),
1058         REG16(0x67c),
1059         REG(0x068),
1060         REG(0x084),
1061         NOP(1),
1062
1063         END(192)
1064 };
1065
1066 #undef END
1067 #undef REG16
1068 #undef REG
1069 #undef LRI
1070 #undef NOP
1071
1072 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1073 {
1074         /*
1075          * The gen12+ lists only have the registers we program in the basic
1076          * default state. We rely on the context image using relative
1077          * addressing to automatic fixup the register state between the
1078          * physical engines for virtual engine.
1079          */
1080         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1081                    !intel_engine_has_relative_mmio(engine));
1082
1083         if (engine->class == RENDER_CLASS) {
1084                 if (INTEL_GEN(engine->i915) >= 12)
1085                         return gen12_rcs_offsets;
1086                 else if (INTEL_GEN(engine->i915) >= 11)
1087                         return gen11_rcs_offsets;
1088                 else if (INTEL_GEN(engine->i915) >= 9)
1089                         return gen9_rcs_offsets;
1090                 else
1091                         return gen8_rcs_offsets;
1092         } else {
1093                 if (INTEL_GEN(engine->i915) >= 12)
1094                         return gen12_xcs_offsets;
1095                 else if (INTEL_GEN(engine->i915) >= 9)
1096                         return gen9_xcs_offsets;
1097                 else
1098                         return gen8_xcs_offsets;
1099         }
1100 }
1101
1102 static struct i915_request *
1103 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1104 {
1105         struct i915_request *rq, *rn, *active = NULL;
1106         struct list_head *uninitialized_var(pl);
1107         int prio = I915_PRIORITY_INVALID;
1108
1109         lockdep_assert_held(&engine->active.lock);
1110
1111         list_for_each_entry_safe_reverse(rq, rn,
1112                                          &engine->active.requests,
1113                                          sched.link) {
1114                 if (i915_request_completed(rq))
1115                         continue; /* XXX */
1116
1117                 __i915_request_unsubmit(rq);
1118
1119                 /*
1120                  * Push the request back into the queue for later resubmission.
1121                  * If this request is not native to this physical engine (i.e.
1122                  * it came from a virtual source), push it back onto the virtual
1123                  * engine so that it can be moved across onto another physical
1124                  * engine as load dictates.
1125                  */
1126                 if (likely(rq->execution_mask == engine->mask)) {
1127                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1128                         if (rq_prio(rq) != prio) {
1129                                 prio = rq_prio(rq);
1130                                 pl = i915_sched_lookup_priolist(engine, prio);
1131                         }
1132                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1133
1134                         list_move(&rq->sched.link, pl);
1135                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1136
1137                         /* Check in case we rollback so far we wrap [size/2] */
1138                         if (intel_ring_direction(rq->ring,
1139                                                  intel_ring_wrap(rq->ring,
1140                                                                  rq->tail),
1141                                                  rq->ring->tail) > 0)
1142                                 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1143
1144                         active = rq;
1145                 } else {
1146                         struct intel_engine_cs *owner = rq->context->engine;
1147
1148                         /*
1149                          * Decouple the virtual breadcrumb before moving it
1150                          * back to the virtual engine -- we don't want the
1151                          * request to complete in the background and try
1152                          * and cancel the breadcrumb on the virtual engine
1153                          * (instead of the old engine where it is linked)!
1154                          */
1155                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1156                                      &rq->fence.flags)) {
1157                                 spin_lock_nested(&rq->lock,
1158                                                  SINGLE_DEPTH_NESTING);
1159                                 i915_request_cancel_breadcrumb(rq);
1160                                 spin_unlock(&rq->lock);
1161                         }
1162                         WRITE_ONCE(rq->engine, owner);
1163                         owner->submit_request(rq);
1164                         active = NULL;
1165                 }
1166         }
1167
1168         return active;
1169 }
1170
1171 struct i915_request *
1172 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1173 {
1174         struct intel_engine_cs *engine =
1175                 container_of(execlists, typeof(*engine), execlists);
1176
1177         return __unwind_incomplete_requests(engine);
1178 }
1179
1180 static inline void
1181 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1182 {
1183         /*
1184          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1185          * The compiler should eliminate this function as dead-code.
1186          */
1187         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1188                 return;
1189
1190         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1191                                    status, rq);
1192 }
1193
1194 static void intel_engine_context_in(struct intel_engine_cs *engine)
1195 {
1196         unsigned long flags;
1197
1198         if (atomic_add_unless(&engine->stats.active, 1, 0))
1199                 return;
1200
1201         write_seqlock_irqsave(&engine->stats.lock, flags);
1202         if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1203                 engine->stats.start = ktime_get();
1204                 atomic_inc(&engine->stats.active);
1205         }
1206         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1207 }
1208
1209 static void intel_engine_context_out(struct intel_engine_cs *engine)
1210 {
1211         unsigned long flags;
1212
1213         GEM_BUG_ON(!atomic_read(&engine->stats.active));
1214
1215         if (atomic_add_unless(&engine->stats.active, -1, 1))
1216                 return;
1217
1218         write_seqlock_irqsave(&engine->stats.lock, flags);
1219         if (atomic_dec_and_test(&engine->stats.active)) {
1220                 engine->stats.total =
1221                         ktime_add(engine->stats.total,
1222                                   ktime_sub(ktime_get(), engine->stats.start));
1223         }
1224         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1225 }
1226
1227 static void
1228 execlists_check_context(const struct intel_context *ce,
1229                         const struct intel_engine_cs *engine)
1230 {
1231         const struct intel_ring *ring = ce->ring;
1232         u32 *regs = ce->lrc_reg_state;
1233         bool valid = true;
1234         int x;
1235
1236         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1237                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1238                        engine->name,
1239                        regs[CTX_RING_START],
1240                        i915_ggtt_offset(ring->vma));
1241                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1242                 valid = false;
1243         }
1244
1245         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1246             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1247                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1248                        engine->name,
1249                        regs[CTX_RING_CTL],
1250                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1251                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1252                 valid = false;
1253         }
1254
1255         x = lrc_ring_mi_mode(engine);
1256         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1257                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1258                        engine->name, regs[x + 1]);
1259                 regs[x + 1] &= ~STOP_RING;
1260                 regs[x + 1] |= STOP_RING << 16;
1261                 valid = false;
1262         }
1263
1264         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1265 }
1266
1267 static void restore_default_state(struct intel_context *ce,
1268                                   struct intel_engine_cs *engine)
1269 {
1270         u32 *regs;
1271
1272         regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1273         execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1274
1275         ce->runtime.last = intel_context_get_runtime(ce);
1276 }
1277
1278 static void reset_active(struct i915_request *rq,
1279                          struct intel_engine_cs *engine)
1280 {
1281         struct intel_context * const ce = rq->context;
1282         u32 head;
1283
1284         /*
1285          * The executing context has been cancelled. We want to prevent
1286          * further execution along this context and propagate the error on
1287          * to anything depending on its results.
1288          *
1289          * In __i915_request_submit(), we apply the -EIO and remove the
1290          * requests' payloads for any banned requests. But first, we must
1291          * rewind the context back to the start of the incomplete request so
1292          * that we do not jump back into the middle of the batch.
1293          *
1294          * We preserve the breadcrumbs and semaphores of the incomplete
1295          * requests so that inter-timeline dependencies (i.e other timelines)
1296          * remain correctly ordered. And we defer to __i915_request_submit()
1297          * so that all asynchronous waits are correctly handled.
1298          */
1299         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1300                      rq->fence.context, rq->fence.seqno);
1301
1302         /* On resubmission of the active request, payload will be scrubbed */
1303         if (i915_request_completed(rq))
1304                 head = rq->tail;
1305         else
1306                 head = active_request(ce->timeline, rq)->head;
1307         head = intel_ring_wrap(ce->ring, head);
1308
1309         /* Scrub the context image to prevent replaying the previous batch */
1310         restore_default_state(ce, engine);
1311         __execlists_update_reg_state(ce, engine, head);
1312
1313         /* We've switched away, so this should be a no-op, but intent matters */
1314         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1315 }
1316
1317 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1318 {
1319 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1320         ce->runtime.num_underflow += dt < 0;
1321         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1322 #endif
1323 }
1324
1325 static void intel_context_update_runtime(struct intel_context *ce)
1326 {
1327         u32 old;
1328         s32 dt;
1329
1330         if (intel_context_is_barrier(ce))
1331                 return;
1332
1333         old = ce->runtime.last;
1334         ce->runtime.last = intel_context_get_runtime(ce);
1335         dt = ce->runtime.last - old;
1336
1337         if (unlikely(dt <= 0)) {
1338                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1339                          old, ce->runtime.last, dt);
1340                 st_update_runtime_underflow(ce, dt);
1341                 return;
1342         }
1343
1344         ewma_runtime_add(&ce->runtime.avg, dt);
1345         ce->runtime.total += dt;
1346 }
1347
1348 static inline struct intel_engine_cs *
1349 __execlists_schedule_in(struct i915_request *rq)
1350 {
1351         struct intel_engine_cs * const engine = rq->engine;
1352         struct intel_context * const ce = rq->context;
1353
1354         intel_context_get(ce);
1355
1356         if (unlikely(intel_context_is_banned(ce)))
1357                 reset_active(rq, engine);
1358
1359         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1360                 execlists_check_context(ce, engine);
1361
1362         if (ce->tag) {
1363                 /* Use a fixed tag for OA and friends */
1364                 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1365                 ce->lrc.ccid = ce->tag;
1366         } else {
1367                 /* We don't need a strict matching tag, just different values */
1368                 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1369
1370                 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1371                 clear_bit(tag - 1, &engine->context_tag);
1372                 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1373
1374                 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1375         }
1376
1377         ce->lrc.ccid |= engine->execlists.ccid;
1378
1379         __intel_gt_pm_get(engine->gt);
1380         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1381         intel_engine_context_in(engine);
1382
1383         return engine;
1384 }
1385
1386 static inline struct i915_request *
1387 execlists_schedule_in(struct i915_request *rq, int idx)
1388 {
1389         struct intel_context * const ce = rq->context;
1390         struct intel_engine_cs *old;
1391
1392         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1393         trace_i915_request_in(rq, idx);
1394
1395         old = READ_ONCE(ce->inflight);
1396         do {
1397                 if (!old) {
1398                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1399                         break;
1400                 }
1401         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1402
1403         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1404         return i915_request_get(rq);
1405 }
1406
1407 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1408 {
1409         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1410         struct i915_request *next = READ_ONCE(ve->request);
1411
1412         if (next && next->execution_mask & ~rq->execution_mask)
1413                 tasklet_schedule(&ve->base.execlists.tasklet);
1414 }
1415
1416 static inline void
1417 __execlists_schedule_out(struct i915_request *rq,
1418                          struct intel_engine_cs * const engine,
1419                          unsigned int ccid)
1420 {
1421         struct intel_context * const ce = rq->context;
1422
1423         /*
1424          * NB process_csb() is not under the engine->active.lock and hence
1425          * schedule_out can race with schedule_in meaning that we should
1426          * refrain from doing non-trivial work here.
1427          */
1428
1429         /*
1430          * If we have just completed this context, the engine may now be
1431          * idle and we want to re-enter powersaving.
1432          */
1433         if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1434             i915_request_completed(rq))
1435                 intel_engine_add_retire(engine, ce->timeline);
1436
1437         ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1438         ccid &= GEN12_MAX_CONTEXT_HW_ID;
1439         if (ccid < BITS_PER_LONG) {
1440                 GEM_BUG_ON(ccid == 0);
1441                 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1442                 set_bit(ccid - 1, &engine->context_tag);
1443         }
1444
1445         intel_context_update_runtime(ce);
1446         intel_engine_context_out(engine);
1447         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1448         intel_gt_pm_put_async(engine->gt);
1449
1450         /*
1451          * If this is part of a virtual engine, its next request may
1452          * have been blocked waiting for access to the active context.
1453          * We have to kick all the siblings again in case we need to
1454          * switch (e.g. the next request is not runnable on this
1455          * engine). Hopefully, we will already have submitted the next
1456          * request before the tasklet runs and do not need to rebuild
1457          * each virtual tree and kick everyone again.
1458          */
1459         if (ce->engine != engine)
1460                 kick_siblings(rq, ce);
1461
1462         intel_context_put(ce);
1463 }
1464
1465 static inline void
1466 execlists_schedule_out(struct i915_request *rq)
1467 {
1468         struct intel_context * const ce = rq->context;
1469         struct intel_engine_cs *cur, *old;
1470         u32 ccid;
1471
1472         trace_i915_request_out(rq);
1473
1474         ccid = rq->context->lrc.ccid;
1475         old = READ_ONCE(ce->inflight);
1476         do
1477                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1478         while (!try_cmpxchg(&ce->inflight, &old, cur));
1479         if (!cur)
1480                 __execlists_schedule_out(rq, old, ccid);
1481
1482         i915_request_put(rq);
1483 }
1484
1485 static u64 execlists_update_context(struct i915_request *rq)
1486 {
1487         struct intel_context *ce = rq->context;
1488         u64 desc = ce->lrc.desc;
1489         u32 tail, prev;
1490
1491         /*
1492          * WaIdleLiteRestore:bdw,skl
1493          *
1494          * We should never submit the context with the same RING_TAIL twice
1495          * just in case we submit an empty ring, which confuses the HW.
1496          *
1497          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1498          * the normal request to be able to always advance the RING_TAIL on
1499          * subsequent resubmissions (for lite restore). Should that fail us,
1500          * and we try and submit the same tail again, force the context
1501          * reload.
1502          *
1503          * If we need to return to a preempted context, we need to skip the
1504          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1505          * HW has a tendency to ignore us rewinding the TAIL to the end of
1506          * an earlier request.
1507          */
1508         GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1509         prev = rq->ring->tail;
1510         tail = intel_ring_set_tail(rq->ring, rq->tail);
1511         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1512                 desc |= CTX_DESC_FORCE_RESTORE;
1513         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1514         rq->tail = rq->wa_tail;
1515
1516         /*
1517          * Make sure the context image is complete before we submit it to HW.
1518          *
1519          * Ostensibly, writes (including the WCB) should be flushed prior to
1520          * an uncached write such as our mmio register access, the empirical
1521          * evidence (esp. on Braswell) suggests that the WC write into memory
1522          * may not be visible to the HW prior to the completion of the UC
1523          * register write and that we may begin execution from the context
1524          * before its image is complete leading to invalid PD chasing.
1525          */
1526         wmb();
1527
1528         ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1529         return desc;
1530 }
1531
1532 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1533 {
1534         if (execlists->ctrl_reg) {
1535                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1536                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1537         } else {
1538                 writel(upper_32_bits(desc), execlists->submit_reg);
1539                 writel(lower_32_bits(desc), execlists->submit_reg);
1540         }
1541 }
1542
1543 static __maybe_unused char *
1544 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1545 {
1546         if (!rq)
1547                 return "";
1548
1549         snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1550                  prefix,
1551                  rq->context->lrc.ccid,
1552                  rq->fence.context, rq->fence.seqno,
1553                  i915_request_completed(rq) ? "!" :
1554                  i915_request_started(rq) ? "*" :
1555                  "",
1556                  rq_prio(rq));
1557
1558         return buf;
1559 }
1560
1561 static __maybe_unused void
1562 trace_ports(const struct intel_engine_execlists *execlists,
1563             const char *msg,
1564             struct i915_request * const *ports)
1565 {
1566         const struct intel_engine_cs *engine =
1567                 container_of(execlists, typeof(*engine), execlists);
1568         char __maybe_unused p0[40], p1[40];
1569
1570         if (!ports[0])
1571                 return;
1572
1573         ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1574                      dump_port(p0, sizeof(p0), "", ports[0]),
1575                      dump_port(p1, sizeof(p1), ", ", ports[1]));
1576 }
1577
1578 static inline bool
1579 reset_in_progress(const struct intel_engine_execlists *execlists)
1580 {
1581         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1582 }
1583
1584 static __maybe_unused bool
1585 assert_pending_valid(const struct intel_engine_execlists *execlists,
1586                      const char *msg)
1587 {
1588         struct intel_engine_cs *engine =
1589                 container_of(execlists, typeof(*engine), execlists);
1590         struct i915_request * const *port, *rq;
1591         struct intel_context *ce = NULL;
1592         bool sentinel = false;
1593         u32 ccid = -1;
1594
1595         trace_ports(execlists, msg, execlists->pending);
1596
1597         /* We may be messing around with the lists during reset, lalala */
1598         if (reset_in_progress(execlists))
1599                 return true;
1600
1601         if (!execlists->pending[0]) {
1602                 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1603                               engine->name);
1604                 return false;
1605         }
1606
1607         if (execlists->pending[execlists_num_ports(execlists)]) {
1608                 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1609                               engine->name, execlists_num_ports(execlists));
1610                 return false;
1611         }
1612
1613         for (port = execlists->pending; (rq = *port); port++) {
1614                 unsigned long flags;
1615                 bool ok = true;
1616
1617                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1618                 GEM_BUG_ON(!i915_request_is_active(rq));
1619
1620                 if (ce == rq->context) {
1621                         GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1622                                       engine->name,
1623                                       ce->timeline->fence_context,
1624                                       port - execlists->pending);
1625                         return false;
1626                 }
1627                 ce = rq->context;
1628
1629                 if (ccid == ce->lrc.ccid) {
1630                         GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1631                                       engine->name,
1632                                       ccid, ce->timeline->fence_context,
1633                                       port - execlists->pending);
1634                         return false;
1635                 }
1636                 ccid = ce->lrc.ccid;
1637
1638                 /*
1639                  * Sentinels are supposed to be lonely so they flush the
1640                  * current exection off the HW. Check that they are the
1641                  * only request in the pending submission.
1642                  */
1643                 if (sentinel) {
1644                         GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1645                                       engine->name,
1646                                       ce->timeline->fence_context,
1647                                       port - execlists->pending);
1648                         return false;
1649                 }
1650
1651                 sentinel = i915_request_has_sentinel(rq);
1652                 if (sentinel && port != execlists->pending) {
1653                         GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
1654                                       engine->name,
1655                                       ce->timeline->fence_context,
1656                                       port - execlists->pending);
1657                         return false;
1658                 }
1659
1660                 /* Hold tightly onto the lock to prevent concurrent retires! */
1661                 if (!spin_trylock_irqsave(&rq->lock, flags))
1662                         continue;
1663
1664                 if (i915_request_completed(rq))
1665                         goto unlock;
1666
1667                 if (i915_active_is_idle(&ce->active) &&
1668                     !intel_context_is_barrier(ce)) {
1669                         GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1670                                       engine->name,
1671                                       ce->timeline->fence_context,
1672                                       port - execlists->pending);
1673                         ok = false;
1674                         goto unlock;
1675                 }
1676
1677                 if (!i915_vma_is_pinned(ce->state)) {
1678                         GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1679                                       engine->name,
1680                                       ce->timeline->fence_context,
1681                                       port - execlists->pending);
1682                         ok = false;
1683                         goto unlock;
1684                 }
1685
1686                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1687                         GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1688                                       engine->name,
1689                                       ce->timeline->fence_context,
1690                                       port - execlists->pending);
1691                         ok = false;
1692                         goto unlock;
1693                 }
1694
1695 unlock:
1696                 spin_unlock_irqrestore(&rq->lock, flags);
1697                 if (!ok)
1698                         return false;
1699         }
1700
1701         return ce;
1702 }
1703
1704 static void execlists_submit_ports(struct intel_engine_cs *engine)
1705 {
1706         struct intel_engine_execlists *execlists = &engine->execlists;
1707         unsigned int n;
1708
1709         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1710
1711         /*
1712          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1713          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1714          * not be relinquished until the device is idle (see
1715          * i915_gem_idle_work_handler()). As a precaution, we make sure
1716          * that all ELSP are drained i.e. we have processed the CSB,
1717          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1718          */
1719         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1720
1721         /*
1722          * ELSQ note: the submit queue is not cleared after being submitted
1723          * to the HW so we need to make sure we always clean it up. This is
1724          * currently ensured by the fact that we always write the same number
1725          * of elsq entries, keep this in mind before changing the loop below.
1726          */
1727         for (n = execlists_num_ports(execlists); n--; ) {
1728                 struct i915_request *rq = execlists->pending[n];
1729
1730                 write_desc(execlists,
1731                            rq ? execlists_update_context(rq) : 0,
1732                            n);
1733         }
1734
1735         /* we need to manually load the submit queue */
1736         if (execlists->ctrl_reg)
1737                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1738 }
1739
1740 static bool ctx_single_port_submission(const struct intel_context *ce)
1741 {
1742         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1743                 intel_context_force_single_submission(ce));
1744 }
1745
1746 static bool can_merge_ctx(const struct intel_context *prev,
1747                           const struct intel_context *next)
1748 {
1749         if (prev != next)
1750                 return false;
1751
1752         if (ctx_single_port_submission(prev))
1753                 return false;
1754
1755         return true;
1756 }
1757
1758 static unsigned long i915_request_flags(const struct i915_request *rq)
1759 {
1760         return READ_ONCE(rq->fence.flags);
1761 }
1762
1763 static bool can_merge_rq(const struct i915_request *prev,
1764                          const struct i915_request *next)
1765 {
1766         GEM_BUG_ON(prev == next);
1767         GEM_BUG_ON(!assert_priority_queue(prev, next));
1768
1769         /*
1770          * We do not submit known completed requests. Therefore if the next
1771          * request is already completed, we can pretend to merge it in
1772          * with the previous context (and we will skip updating the ELSP
1773          * and tracking). Thus hopefully keeping the ELSP full with active
1774          * contexts, despite the best efforts of preempt-to-busy to confuse
1775          * us.
1776          */
1777         if (i915_request_completed(next))
1778                 return true;
1779
1780         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1781                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1782                       BIT(I915_FENCE_FLAG_SENTINEL))))
1783                 return false;
1784
1785         if (!can_merge_ctx(prev->context, next->context))
1786                 return false;
1787
1788         GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1789         return true;
1790 }
1791
1792 static void virtual_update_register_offsets(u32 *regs,
1793                                             struct intel_engine_cs *engine)
1794 {
1795         set_offsets(regs, reg_offsets(engine), engine, false);
1796 }
1797
1798 static bool virtual_matches(const struct virtual_engine *ve,
1799                             const struct i915_request *rq,
1800                             const struct intel_engine_cs *engine)
1801 {
1802         const struct intel_engine_cs *inflight;
1803
1804         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1805                 return false;
1806
1807         /*
1808          * We track when the HW has completed saving the context image
1809          * (i.e. when we have seen the final CS event switching out of
1810          * the context) and must not overwrite the context image before
1811          * then. This restricts us to only using the active engine
1812          * while the previous virtualized request is inflight (so
1813          * we reuse the register offsets). This is a very small
1814          * hystersis on the greedy seelction algorithm.
1815          */
1816         inflight = intel_context_inflight(&ve->context);
1817         if (inflight && inflight != engine)
1818                 return false;
1819
1820         return true;
1821 }
1822
1823 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
1824 {
1825         /*
1826          * All the outstanding signals on ve->siblings[0] must have
1827          * been completed, just pending the interrupt handler. As those
1828          * signals still refer to the old sibling (via rq->engine), we must
1829          * transfer those to the old irq_worker to keep our locking
1830          * consistent.
1831          */
1832         intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
1833 }
1834
1835 #define for_each_waiter(p__, rq__) \
1836         list_for_each_entry_lockless(p__, \
1837                                      &(rq__)->sched.waiters_list, \
1838                                      wait_link)
1839
1840 #define for_each_signaler(p__, rq__) \
1841         list_for_each_entry_rcu(p__, \
1842                                 &(rq__)->sched.signalers_list, \
1843                                 signal_link)
1844
1845 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1846 {
1847         LIST_HEAD(list);
1848
1849         /*
1850          * We want to move the interrupted request to the back of
1851          * the round-robin list (i.e. its priority level), but
1852          * in doing so, we must then move all requests that were in
1853          * flight and were waiting for the interrupted request to
1854          * be run after it again.
1855          */
1856         do {
1857                 struct i915_dependency *p;
1858
1859                 GEM_BUG_ON(i915_request_is_active(rq));
1860                 list_move_tail(&rq->sched.link, pl);
1861
1862                 for_each_waiter(p, rq) {
1863                         struct i915_request *w =
1864                                 container_of(p->waiter, typeof(*w), sched);
1865
1866                         if (p->flags & I915_DEPENDENCY_WEAK)
1867                                 continue;
1868
1869                         /* Leave semaphores spinning on the other engines */
1870                         if (w->engine != rq->engine)
1871                                 continue;
1872
1873                         /* No waiter should start before its signaler */
1874                         GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1875                                    i915_request_started(w) &&
1876                                    !i915_request_completed(rq));
1877
1878                         GEM_BUG_ON(i915_request_is_active(w));
1879                         if (!i915_request_is_ready(w))
1880                                 continue;
1881
1882                         if (rq_prio(w) < rq_prio(rq))
1883                                 continue;
1884
1885                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1886                         list_move_tail(&w->sched.link, &list);
1887                 }
1888
1889                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1890         } while (rq);
1891 }
1892
1893 static void defer_active(struct intel_engine_cs *engine)
1894 {
1895         struct i915_request *rq;
1896
1897         rq = __unwind_incomplete_requests(engine);
1898         if (!rq)
1899                 return;
1900
1901         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1902 }
1903
1904 static bool
1905 need_timeslice(const struct intel_engine_cs *engine,
1906                const struct i915_request *rq,
1907                const struct rb_node *rb)
1908 {
1909         int hint;
1910
1911         if (!intel_engine_has_timeslices(engine))
1912                 return false;
1913
1914         hint = engine->execlists.queue_priority_hint;
1915
1916         if (rb) {
1917                 const struct virtual_engine *ve =
1918                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1919                 const struct intel_engine_cs *inflight =
1920                         intel_context_inflight(&ve->context);
1921
1922                 if (!inflight || inflight == engine) {
1923                         struct i915_request *next;
1924
1925                         rcu_read_lock();
1926                         next = READ_ONCE(ve->request);
1927                         if (next)
1928                                 hint = max(hint, rq_prio(next));
1929                         rcu_read_unlock();
1930                 }
1931         }
1932
1933         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1934                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1935
1936         GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1937         return hint >= effective_prio(rq);
1938 }
1939
1940 static bool
1941 timeslice_yield(const struct intel_engine_execlists *el,
1942                 const struct i915_request *rq)
1943 {
1944         /*
1945          * Once bitten, forever smitten!
1946          *
1947          * If the active context ever busy-waited on a semaphore,
1948          * it will be treated as a hog until the end of its timeslice (i.e.
1949          * until it is scheduled out and replaced by a new submission,
1950          * possibly even its own lite-restore). The HW only sends an interrupt
1951          * on the first miss, and we do know if that semaphore has been
1952          * signaled, or even if it is now stuck on another semaphore. Play
1953          * safe, yield if it might be stuck -- it will be given a fresh
1954          * timeslice in the near future.
1955          */
1956         return rq->context->lrc.ccid == READ_ONCE(el->yield);
1957 }
1958
1959 static bool
1960 timeslice_expired(const struct intel_engine_execlists *el,
1961                   const struct i915_request *rq)
1962 {
1963         return timer_expired(&el->timer) || timeslice_yield(el, rq);
1964 }
1965
1966 static int
1967 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1968 {
1969         if (list_is_last(&rq->sched.link, &engine->active.requests))
1970                 return INT_MIN;
1971
1972         return rq_prio(list_next_entry(rq, sched.link));
1973 }
1974
1975 static inline unsigned long
1976 timeslice(const struct intel_engine_cs *engine)
1977 {
1978         return READ_ONCE(engine->props.timeslice_duration_ms);
1979 }
1980
1981 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1982 {
1983         const struct intel_engine_execlists *execlists = &engine->execlists;
1984         const struct i915_request *rq = *execlists->active;
1985
1986         if (!rq || i915_request_completed(rq))
1987                 return 0;
1988
1989         if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1990                 return 0;
1991
1992         return timeslice(engine);
1993 }
1994
1995 static void set_timeslice(struct intel_engine_cs *engine)
1996 {
1997         unsigned long duration;
1998
1999         if (!intel_engine_has_timeslices(engine))
2000                 return;
2001
2002         duration = active_timeslice(engine);
2003         ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2004
2005         set_timer_ms(&engine->execlists.timer, duration);
2006 }
2007
2008 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2009 {
2010         struct intel_engine_execlists *execlists = &engine->execlists;
2011         unsigned long duration;
2012
2013         if (!intel_engine_has_timeslices(engine))
2014                 return;
2015
2016         WRITE_ONCE(execlists->switch_priority_hint, prio);
2017         if (prio == INT_MIN)
2018                 return;
2019
2020         if (timer_pending(&execlists->timer))
2021                 return;
2022
2023         duration = timeslice(engine);
2024         ENGINE_TRACE(engine,
2025                      "start timeslicing, prio:%d, interval:%lu",
2026                      prio, duration);
2027
2028         set_timer_ms(&execlists->timer, duration);
2029 }
2030
2031 static void record_preemption(struct intel_engine_execlists *execlists)
2032 {
2033         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2034 }
2035
2036 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2037                                             const struct i915_request *rq)
2038 {
2039         if (!rq)
2040                 return 0;
2041
2042         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2043         if (unlikely(intel_context_is_banned(rq->context)))
2044                 return 1;
2045
2046         return READ_ONCE(engine->props.preempt_timeout_ms);
2047 }
2048
2049 static void set_preempt_timeout(struct intel_engine_cs *engine,
2050                                 const struct i915_request *rq)
2051 {
2052         if (!intel_engine_has_preempt_reset(engine))
2053                 return;
2054
2055         set_timer_ms(&engine->execlists.preempt,
2056                      active_preempt_timeout(engine, rq));
2057 }
2058
2059 static inline void clear_ports(struct i915_request **ports, int count)
2060 {
2061         memset_p((void **)ports, NULL, count);
2062 }
2063
2064 static void execlists_dequeue(struct intel_engine_cs *engine)
2065 {
2066         struct intel_engine_execlists * const execlists = &engine->execlists;
2067         struct i915_request **port = execlists->pending;
2068         struct i915_request ** const last_port = port + execlists->port_mask;
2069         struct i915_request * const *active;
2070         struct i915_request *last;
2071         struct rb_node *rb;
2072         bool submit = false;
2073
2074         /*
2075          * Hardware submission is through 2 ports. Conceptually each port
2076          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2077          * static for a context, and unique to each, so we only execute
2078          * requests belonging to a single context from each ring. RING_HEAD
2079          * is maintained by the CS in the context image, it marks the place
2080          * where it got up to last time, and through RING_TAIL we tell the CS
2081          * where we want to execute up to this time.
2082          *
2083          * In this list the requests are in order of execution. Consecutive
2084          * requests from the same context are adjacent in the ringbuffer. We
2085          * can combine these requests into a single RING_TAIL update:
2086          *
2087          *              RING_HEAD...req1...req2
2088          *                                    ^- RING_TAIL
2089          * since to execute req2 the CS must first execute req1.
2090          *
2091          * Our goal then is to point each port to the end of a consecutive
2092          * sequence of requests as being the most optimal (fewest wake ups
2093          * and context switches) submission.
2094          */
2095
2096         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2097                 struct virtual_engine *ve =
2098                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2099                 struct i915_request *rq = READ_ONCE(ve->request);
2100
2101                 if (!rq) { /* lazily cleanup after another engine handled rq */
2102                         rb_erase_cached(rb, &execlists->virtual);
2103                         RB_CLEAR_NODE(rb);
2104                         rb = rb_first_cached(&execlists->virtual);
2105                         continue;
2106                 }
2107
2108                 if (!virtual_matches(ve, rq, engine)) {
2109                         rb = rb_next(rb);
2110                         continue;
2111                 }
2112
2113                 break;
2114         }
2115
2116         /*
2117          * If the queue is higher priority than the last
2118          * request in the currently active context, submit afresh.
2119          * We will resubmit again afterwards in case we need to split
2120          * the active context to interject the preemption request,
2121          * i.e. we will retrigger preemption following the ack in case
2122          * of trouble.
2123          */
2124         active = READ_ONCE(execlists->active);
2125
2126         /*
2127          * In theory we can skip over completed contexts that have not
2128          * yet been processed by events (as those events are in flight):
2129          *
2130          * while ((last = *active) && i915_request_completed(last))
2131          *      active++;
2132          *
2133          * However, the GPU cannot handle this as it will ultimately
2134          * find itself trying to jump back into a context it has just
2135          * completed and barf.
2136          */
2137
2138         if ((last = *active)) {
2139                 if (need_preempt(engine, last, rb)) {
2140                         if (i915_request_completed(last)) {
2141                                 tasklet_hi_schedule(&execlists->tasklet);
2142                                 return;
2143                         }
2144
2145                         ENGINE_TRACE(engine,
2146                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2147                                      last->fence.context,
2148                                      last->fence.seqno,
2149                                      last->sched.attr.priority,
2150                                      execlists->queue_priority_hint);
2151                         record_preemption(execlists);
2152
2153                         /*
2154                          * Don't let the RING_HEAD advance past the breadcrumb
2155                          * as we unwind (and until we resubmit) so that we do
2156                          * not accidentally tell it to go backwards.
2157                          */
2158                         ring_set_paused(engine, 1);
2159
2160                         /*
2161                          * Note that we have not stopped the GPU at this point,
2162                          * so we are unwinding the incomplete requests as they
2163                          * remain inflight and so by the time we do complete
2164                          * the preemption, some of the unwound requests may
2165                          * complete!
2166                          */
2167                         __unwind_incomplete_requests(engine);
2168
2169                         last = NULL;
2170                 } else if (need_timeslice(engine, last, rb) &&
2171                            timeslice_expired(execlists, last)) {
2172                         if (i915_request_completed(last)) {
2173                                 tasklet_hi_schedule(&execlists->tasklet);
2174                                 return;
2175                         }
2176
2177                         ENGINE_TRACE(engine,
2178                                      "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2179                                      last->fence.context,
2180                                      last->fence.seqno,
2181                                      last->sched.attr.priority,
2182                                      execlists->queue_priority_hint,
2183                                      yesno(timeslice_yield(execlists, last)));
2184
2185                         ring_set_paused(engine, 1);
2186                         defer_active(engine);
2187
2188                         /*
2189                          * Unlike for preemption, if we rewind and continue
2190                          * executing the same context as previously active,
2191                          * the order of execution will remain the same and
2192                          * the tail will only advance. We do not need to
2193                          * force a full context restore, as a lite-restore
2194                          * is sufficient to resample the monotonic TAIL.
2195                          *
2196                          * If we switch to any other context, similarly we
2197                          * will not rewind TAIL of current context, and
2198                          * normal save/restore will preserve state and allow
2199                          * us to later continue executing the same request.
2200                          */
2201                         last = NULL;
2202                 } else {
2203                         /*
2204                          * Otherwise if we already have a request pending
2205                          * for execution after the current one, we can
2206                          * just wait until the next CS event before
2207                          * queuing more. In either case we will force a
2208                          * lite-restore preemption event, but if we wait
2209                          * we hopefully coalesce several updates into a single
2210                          * submission.
2211                          */
2212                         if (!list_is_last(&last->sched.link,
2213                                           &engine->active.requests)) {
2214                                 /*
2215                                  * Even if ELSP[1] is occupied and not worthy
2216                                  * of timeslices, our queue might be.
2217                                  */
2218                                 start_timeslice(engine, queue_prio(execlists));
2219                                 return;
2220                         }
2221                 }
2222         }
2223
2224         while (rb) { /* XXX virtual is always taking precedence */
2225                 struct virtual_engine *ve =
2226                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2227                 struct i915_request *rq;
2228
2229                 spin_lock(&ve->base.active.lock);
2230
2231                 rq = ve->request;
2232                 if (unlikely(!rq)) { /* lost the race to a sibling */
2233                         spin_unlock(&ve->base.active.lock);
2234                         rb_erase_cached(rb, &execlists->virtual);
2235                         RB_CLEAR_NODE(rb);
2236                         rb = rb_first_cached(&execlists->virtual);
2237                         continue;
2238                 }
2239
2240                 GEM_BUG_ON(rq != ve->request);
2241                 GEM_BUG_ON(rq->engine != &ve->base);
2242                 GEM_BUG_ON(rq->context != &ve->context);
2243
2244                 if (rq_prio(rq) >= queue_prio(execlists)) {
2245                         if (!virtual_matches(ve, rq, engine)) {
2246                                 spin_unlock(&ve->base.active.lock);
2247                                 rb = rb_next(rb);
2248                                 continue;
2249                         }
2250
2251                         if (last && !can_merge_rq(last, rq)) {
2252                                 spin_unlock(&ve->base.active.lock);
2253                                 start_timeslice(engine, rq_prio(rq));
2254                                 return; /* leave this for another sibling */
2255                         }
2256
2257                         ENGINE_TRACE(engine,
2258                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
2259                                      rq->fence.context,
2260                                      rq->fence.seqno,
2261                                      i915_request_completed(rq) ? "!" :
2262                                      i915_request_started(rq) ? "*" :
2263                                      "",
2264                                      yesno(engine != ve->siblings[0]));
2265
2266                         WRITE_ONCE(ve->request, NULL);
2267                         WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2268                                    INT_MIN);
2269                         rb_erase_cached(rb, &execlists->virtual);
2270                         RB_CLEAR_NODE(rb);
2271
2272                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2273                         WRITE_ONCE(rq->engine, engine);
2274
2275                         if (engine != ve->siblings[0]) {
2276                                 u32 *regs = ve->context.lrc_reg_state;
2277                                 unsigned int n;
2278
2279                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2280
2281                                 if (!intel_engine_has_relative_mmio(engine))
2282                                         virtual_update_register_offsets(regs,
2283                                                                         engine);
2284
2285                                 if (!list_empty(&ve->context.signals))
2286                                         virtual_xfer_breadcrumbs(ve);
2287
2288                                 /*
2289                                  * Move the bound engine to the top of the list
2290                                  * for future execution. We then kick this
2291                                  * tasklet first before checking others, so that
2292                                  * we preferentially reuse this set of bound
2293                                  * registers.
2294                                  */
2295                                 for (n = 1; n < ve->num_siblings; n++) {
2296                                         if (ve->siblings[n] == engine) {
2297                                                 swap(ve->siblings[n],
2298                                                      ve->siblings[0]);
2299                                                 break;
2300                                         }
2301                                 }
2302
2303                                 GEM_BUG_ON(ve->siblings[0] != engine);
2304                         }
2305
2306                         if (__i915_request_submit(rq)) {
2307                                 submit = true;
2308                                 last = rq;
2309                         }
2310                         i915_request_put(rq);
2311
2312                         /*
2313                          * Hmm, we have a bunch of virtual engine requests,
2314                          * but the first one was already completed (thanks
2315                          * preempt-to-busy!). Keep looking at the veng queue
2316                          * until we have no more relevant requests (i.e.
2317                          * the normal submit queue has higher priority).
2318                          */
2319                         if (!submit) {
2320                                 spin_unlock(&ve->base.active.lock);
2321                                 rb = rb_first_cached(&execlists->virtual);
2322                                 continue;
2323                         }
2324                 }
2325
2326                 spin_unlock(&ve->base.active.lock);
2327                 break;
2328         }
2329
2330         while ((rb = rb_first_cached(&execlists->queue))) {
2331                 struct i915_priolist *p = to_priolist(rb);
2332                 struct i915_request *rq, *rn;
2333                 int i;
2334
2335                 priolist_for_each_request_consume(rq, rn, p, i) {
2336                         bool merge = true;
2337
2338                         /*
2339                          * Can we combine this request with the current port?
2340                          * It has to be the same context/ringbuffer and not
2341                          * have any exceptions (e.g. GVT saying never to
2342                          * combine contexts).
2343                          *
2344                          * If we can combine the requests, we can execute both
2345                          * by updating the RING_TAIL to point to the end of the
2346                          * second request, and so we never need to tell the
2347                          * hardware about the first.
2348                          */
2349                         if (last && !can_merge_rq(last, rq)) {
2350                                 /*
2351                                  * If we are on the second port and cannot
2352                                  * combine this request with the last, then we
2353                                  * are done.
2354                                  */
2355                                 if (port == last_port)
2356                                         goto done;
2357
2358                                 /*
2359                                  * We must not populate both ELSP[] with the
2360                                  * same LRCA, i.e. we must submit 2 different
2361                                  * contexts if we submit 2 ELSP.
2362                                  */
2363                                 if (last->context == rq->context)
2364                                         goto done;
2365
2366                                 if (i915_request_has_sentinel(last))
2367                                         goto done;
2368
2369                                 /*
2370                                  * If GVT overrides us we only ever submit
2371                                  * port[0], leaving port[1] empty. Note that we
2372                                  * also have to be careful that we don't queue
2373                                  * the same context (even though a different
2374                                  * request) to the second port.
2375                                  */
2376                                 if (ctx_single_port_submission(last->context) ||
2377                                     ctx_single_port_submission(rq->context))
2378                                         goto done;
2379
2380                                 merge = false;
2381                         }
2382
2383                         if (__i915_request_submit(rq)) {
2384                                 if (!merge) {
2385                                         *port = execlists_schedule_in(last, port - execlists->pending);
2386                                         port++;
2387                                         last = NULL;
2388                                 }
2389
2390                                 GEM_BUG_ON(last &&
2391                                            !can_merge_ctx(last->context,
2392                                                           rq->context));
2393                                 GEM_BUG_ON(last &&
2394                                            i915_seqno_passed(last->fence.seqno,
2395                                                              rq->fence.seqno));
2396
2397                                 submit = true;
2398                                 last = rq;
2399                         }
2400                 }
2401
2402                 rb_erase_cached(&p->node, &execlists->queue);
2403                 i915_priolist_free(p);
2404         }
2405
2406 done:
2407         /*
2408          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2409          *
2410          * We choose the priority hint such that if we add a request of greater
2411          * priority than this, we kick the submission tasklet to decide on
2412          * the right order of submitting the requests to hardware. We must
2413          * also be prepared to reorder requests as they are in-flight on the
2414          * HW. We derive the priority hint then as the first "hole" in
2415          * the HW submission ports and if there are no available slots,
2416          * the priority of the lowest executing request, i.e. last.
2417          *
2418          * When we do receive a higher priority request ready to run from the
2419          * user, see queue_request(), the priority hint is bumped to that
2420          * request triggering preemption on the next dequeue (or subsequent
2421          * interrupt for secondary ports).
2422          */
2423         execlists->queue_priority_hint = queue_prio(execlists);
2424
2425         if (submit) {
2426                 *port = execlists_schedule_in(last, port - execlists->pending);
2427                 execlists->switch_priority_hint =
2428                         switch_prio(engine, *execlists->pending);
2429
2430                 /*
2431                  * Skip if we ended up with exactly the same set of requests,
2432                  * e.g. trying to timeslice a pair of ordered contexts
2433                  */
2434                 if (!memcmp(active, execlists->pending,
2435                             (port - execlists->pending + 1) * sizeof(*port))) {
2436                         do
2437                                 execlists_schedule_out(fetch_and_zero(port));
2438                         while (port-- != execlists->pending);
2439
2440                         goto skip_submit;
2441                 }
2442                 clear_ports(port + 1, last_port - port);
2443
2444                 WRITE_ONCE(execlists->yield, -1);
2445                 set_preempt_timeout(engine, *active);
2446                 execlists_submit_ports(engine);
2447         } else {
2448 skip_submit:
2449                 ring_set_paused(engine, 0);
2450         }
2451 }
2452
2453 static void
2454 cancel_port_requests(struct intel_engine_execlists * const execlists)
2455 {
2456         struct i915_request * const *port;
2457
2458         for (port = execlists->pending; *port; port++)
2459                 execlists_schedule_out(*port);
2460         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2461
2462         /* Mark the end of active before we overwrite *active */
2463         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2464                 execlists_schedule_out(*port);
2465         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2466
2467         smp_wmb(); /* complete the seqlock for execlists_active() */
2468         WRITE_ONCE(execlists->active, execlists->inflight);
2469 }
2470
2471 static inline void
2472 invalidate_csb_entries(const u32 *first, const u32 *last)
2473 {
2474         clflush((void *)first);
2475         clflush((void *)last);
2476 }
2477
2478 /*
2479  * Starting with Gen12, the status has a new format:
2480  *
2481  *     bit  0:     switched to new queue
2482  *     bit  1:     reserved
2483  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2484  *                 switch detail is set to "wait on semaphore"
2485  *     bits 3-5:   engine class
2486  *     bits 6-11:  engine instance
2487  *     bits 12-14: reserved
2488  *     bits 15-25: sw context id of the lrc the GT switched to
2489  *     bits 26-31: sw counter of the lrc the GT switched to
2490  *     bits 32-35: context switch detail
2491  *                  - 0: ctx complete
2492  *                  - 1: wait on sync flip
2493  *                  - 2: wait on vblank
2494  *                  - 3: wait on scanline
2495  *                  - 4: wait on semaphore
2496  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2497  *                       WAIT_FOR_EVENT)
2498  *     bit  36:    reserved
2499  *     bits 37-43: wait detail (for switch detail 1 to 4)
2500  *     bits 44-46: reserved
2501  *     bits 47-57: sw context id of the lrc the GT switched away from
2502  *     bits 58-63: sw counter of the lrc the GT switched away from
2503  */
2504 static inline bool
2505 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2506 {
2507         u32 lower_dw = csb[0];
2508         u32 upper_dw = csb[1];
2509         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2510         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2511         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2512
2513         /*
2514          * The context switch detail is not guaranteed to be 5 when a preemption
2515          * occurs, so we can't just check for that. The check below works for
2516          * all the cases we care about, including preemptions of WAIT
2517          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2518          * would require some extra handling, but we don't support that.
2519          */
2520         if (!ctx_away_valid || new_queue) {
2521                 GEM_BUG_ON(!ctx_to_valid);
2522                 return true;
2523         }
2524
2525         /*
2526          * switch detail = 5 is covered by the case above and we do not expect a
2527          * context switch on an unsuccessful wait instruction since we always
2528          * use polling mode.
2529          */
2530         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2531         return false;
2532 }
2533
2534 static inline bool
2535 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2536 {
2537         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2538 }
2539
2540 static void process_csb(struct intel_engine_cs *engine)
2541 {
2542         struct intel_engine_execlists * const execlists = &engine->execlists;
2543         const u32 * const buf = execlists->csb_status;
2544         const u8 num_entries = execlists->csb_size;
2545         u8 head, tail;
2546
2547         /*
2548          * As we modify our execlists state tracking we require exclusive
2549          * access. Either we are inside the tasklet, or the tasklet is disabled
2550          * and we assume that is only inside the reset paths and so serialised.
2551          */
2552         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2553                    !reset_in_progress(execlists));
2554         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2555
2556         /*
2557          * Note that csb_write, csb_status may be either in HWSP or mmio.
2558          * When reading from the csb_write mmio register, we have to be
2559          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2560          * the low 4bits. As it happens we know the next 4bits are always
2561          * zero and so we can simply masked off the low u8 of the register
2562          * and treat it identically to reading from the HWSP (without having
2563          * to use explicit shifting and masking, and probably bifurcating
2564          * the code to handle the legacy mmio read).
2565          */
2566         head = execlists->csb_head;
2567         tail = READ_ONCE(*execlists->csb_write);
2568         if (unlikely(head == tail))
2569                 return;
2570
2571         /*
2572          * Hopefully paired with a wmb() in HW!
2573          *
2574          * We must complete the read of the write pointer before any reads
2575          * from the CSB, so that we do not see stale values. Without an rmb
2576          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2577          * we perform the READ_ONCE(*csb_write).
2578          */
2579         rmb();
2580
2581         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2582         do {
2583                 bool promote;
2584
2585                 if (++head == num_entries)
2586                         head = 0;
2587
2588                 /*
2589                  * We are flying near dragons again.
2590                  *
2591                  * We hold a reference to the request in execlist_port[]
2592                  * but no more than that. We are operating in softirq
2593                  * context and so cannot hold any mutex or sleep. That
2594                  * prevents us stopping the requests we are processing
2595                  * in port[] from being retired simultaneously (the
2596                  * breadcrumb will be complete before we see the
2597                  * context-switch). As we only hold the reference to the
2598                  * request, any pointer chasing underneath the request
2599                  * is subject to a potential use-after-free. Thus we
2600                  * store all of the bookkeeping within port[] as
2601                  * required, and avoid using unguarded pointers beneath
2602                  * request itself. The same applies to the atomic
2603                  * status notifier.
2604                  */
2605
2606                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2607                              head, buf[2 * head + 0], buf[2 * head + 1]);
2608
2609                 if (INTEL_GEN(engine->i915) >= 12)
2610                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2611                 else
2612                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2613                 if (promote) {
2614                         struct i915_request * const *old = execlists->active;
2615
2616                         ring_set_paused(engine, 0);
2617
2618                         /* Point active to the new ELSP; prevent overwriting */
2619                         WRITE_ONCE(execlists->active, execlists->pending);
2620                         smp_wmb(); /* notify execlists_active() */
2621
2622                         /* cancel old inflight, prepare for switch */
2623                         trace_ports(execlists, "preempted", old);
2624                         while (*old)
2625                                 execlists_schedule_out(*old++);
2626
2627                         /* switch pending to inflight */
2628                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2629                         memcpy(execlists->inflight,
2630                                execlists->pending,
2631                                execlists_num_ports(execlists) *
2632                                sizeof(*execlists->pending));
2633                         smp_wmb(); /* complete the seqlock */
2634                         WRITE_ONCE(execlists->active, execlists->inflight);
2635
2636                         WRITE_ONCE(execlists->pending[0], NULL);
2637                 } else {
2638                         GEM_BUG_ON(!*execlists->active);
2639
2640                         /* port0 completed, advanced to port1 */
2641                         trace_ports(execlists, "completed", execlists->active);
2642
2643                         /*
2644                          * We rely on the hardware being strongly
2645                          * ordered, that the breadcrumb write is
2646                          * coherent (visible from the CPU) before the
2647                          * user interrupt is processed. One might assume
2648                          * that the breadcrumb write being before the
2649                          * user interrupt and the CS event for the context
2650                          * switch would therefore be before the CS event
2651                          * itself...
2652                          */
2653                         if (GEM_SHOW_DEBUG() &&
2654                             !i915_request_completed(*execlists->active)) {
2655                                 struct i915_request *rq = *execlists->active;
2656                                 const u32 *regs __maybe_unused =
2657                                         rq->context->lrc_reg_state;
2658
2659                                 ENGINE_TRACE(engine,
2660                                              "context completed before request!\n");
2661                                 ENGINE_TRACE(engine,
2662                                              "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2663                                              ENGINE_READ(engine, RING_START),
2664                                              ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2665                                              ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2666                                              ENGINE_READ(engine, RING_CTL),
2667                                              ENGINE_READ(engine, RING_MI_MODE));
2668                                 ENGINE_TRACE(engine,
2669                                              "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2670                                              i915_ggtt_offset(rq->ring->vma),
2671                                              rq->head, rq->tail,
2672                                              rq->fence.context,
2673                                              lower_32_bits(rq->fence.seqno),
2674                                              hwsp_seqno(rq));
2675                                 ENGINE_TRACE(engine,
2676                                              "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2677                                              regs[CTX_RING_START],
2678                                              regs[CTX_RING_HEAD],
2679                                              regs[CTX_RING_TAIL]);
2680                         }
2681
2682                         execlists_schedule_out(*execlists->active++);
2683
2684                         GEM_BUG_ON(execlists->active - execlists->inflight >
2685                                    execlists_num_ports(execlists));
2686                 }
2687         } while (head != tail);
2688
2689         execlists->csb_head = head;
2690         set_timeslice(engine);
2691
2692         /*
2693          * Gen11 has proven to fail wrt global observation point between
2694          * entry and tail update, failing on the ordering and thus
2695          * we see an old entry in the context status buffer.
2696          *
2697          * Forcibly evict out entries for the next gpu csb update,
2698          * to increase the odds that we get a fresh entries with non
2699          * working hardware. The cost for doing so comes out mostly with
2700          * the wash as hardware, working or not, will need to do the
2701          * invalidation before.
2702          */
2703         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2704 }
2705
2706 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2707 {
2708         lockdep_assert_held(&engine->active.lock);
2709         if (!READ_ONCE(engine->execlists.pending[0])) {
2710                 rcu_read_lock(); /* protect peeking at execlists->active */
2711                 execlists_dequeue(engine);
2712                 rcu_read_unlock();
2713         }
2714 }
2715
2716 static void __execlists_hold(struct i915_request *rq)
2717 {
2718         LIST_HEAD(list);
2719
2720         do {
2721                 struct i915_dependency *p;
2722
2723                 if (i915_request_is_active(rq))
2724                         __i915_request_unsubmit(rq);
2725
2726                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2727                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2728                 i915_request_set_hold(rq);
2729                 RQ_TRACE(rq, "on hold\n");
2730
2731                 for_each_waiter(p, rq) {
2732                         struct i915_request *w =
2733                                 container_of(p->waiter, typeof(*w), sched);
2734
2735                         /* Leave semaphores spinning on the other engines */
2736                         if (w->engine != rq->engine)
2737                                 continue;
2738
2739                         if (!i915_request_is_ready(w))
2740                                 continue;
2741
2742                         if (i915_request_completed(w))
2743                                 continue;
2744
2745                         if (i915_request_on_hold(w))
2746                                 continue;
2747
2748                         list_move_tail(&w->sched.link, &list);
2749                 }
2750
2751                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2752         } while (rq);
2753 }
2754
2755 static bool execlists_hold(struct intel_engine_cs *engine,
2756                            struct i915_request *rq)
2757 {
2758         spin_lock_irq(&engine->active.lock);
2759
2760         if (i915_request_completed(rq)) { /* too late! */
2761                 rq = NULL;
2762                 goto unlock;
2763         }
2764
2765         if (rq->engine != engine) { /* preempted virtual engine */
2766                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2767
2768                 /*
2769                  * intel_context_inflight() is only protected by virtue
2770                  * of process_csb() being called only by the tasklet (or
2771                  * directly from inside reset while the tasklet is suspended).
2772                  * Assert that neither of those are allowed to run while we
2773                  * poke at the request queues.
2774                  */
2775                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2776
2777                 /*
2778                  * An unsubmitted request along a virtual engine will
2779                  * remain on the active (this) engine until we are able
2780                  * to process the context switch away (and so mark the
2781                  * context as no longer in flight). That cannot have happened
2782                  * yet, otherwise we would not be hanging!
2783                  */
2784                 spin_lock(&ve->base.active.lock);
2785                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2786                 GEM_BUG_ON(ve->request != rq);
2787                 ve->request = NULL;
2788                 spin_unlock(&ve->base.active.lock);
2789                 i915_request_put(rq);
2790
2791                 rq->engine = engine;
2792         }
2793
2794         /*
2795          * Transfer this request onto the hold queue to prevent it
2796          * being resumbitted to HW (and potentially completed) before we have
2797          * released it. Since we may have already submitted following
2798          * requests, we need to remove those as well.
2799          */
2800         GEM_BUG_ON(i915_request_on_hold(rq));
2801         GEM_BUG_ON(rq->engine != engine);
2802         __execlists_hold(rq);
2803         GEM_BUG_ON(list_empty(&engine->active.hold));
2804
2805 unlock:
2806         spin_unlock_irq(&engine->active.lock);
2807         return rq;
2808 }
2809
2810 static bool hold_request(const struct i915_request *rq)
2811 {
2812         struct i915_dependency *p;
2813         bool result = false;
2814
2815         /*
2816          * If one of our ancestors is on hold, we must also be on hold,
2817          * otherwise we will bypass it and execute before it.
2818          */
2819         rcu_read_lock();
2820         for_each_signaler(p, rq) {
2821                 const struct i915_request *s =
2822                         container_of(p->signaler, typeof(*s), sched);
2823
2824                 if (s->engine != rq->engine)
2825                         continue;
2826
2827                 result = i915_request_on_hold(s);
2828                 if (result)
2829                         break;
2830         }
2831         rcu_read_unlock();
2832
2833         return result;
2834 }
2835
2836 static void __execlists_unhold(struct i915_request *rq)
2837 {
2838         LIST_HEAD(list);
2839
2840         do {
2841                 struct i915_dependency *p;
2842
2843                 RQ_TRACE(rq, "hold release\n");
2844
2845                 GEM_BUG_ON(!i915_request_on_hold(rq));
2846                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2847
2848                 i915_request_clear_hold(rq);
2849                 list_move_tail(&rq->sched.link,
2850                                i915_sched_lookup_priolist(rq->engine,
2851                                                           rq_prio(rq)));
2852                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2853
2854                 /* Also release any children on this engine that are ready */
2855                 for_each_waiter(p, rq) {
2856                         struct i915_request *w =
2857                                 container_of(p->waiter, typeof(*w), sched);
2858
2859                         /* Propagate any change in error status */
2860                         if (rq->fence.error)
2861                                 i915_request_set_error_once(w, rq->fence.error);
2862
2863                         if (w->engine != rq->engine)
2864                                 continue;
2865
2866                         if (!i915_request_on_hold(w))
2867                                 continue;
2868
2869                         /* Check that no other parents are also on hold */
2870                         if (hold_request(w))
2871                                 continue;
2872
2873                         list_move_tail(&w->sched.link, &list);
2874                 }
2875
2876                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2877         } while (rq);
2878 }
2879
2880 static void execlists_unhold(struct intel_engine_cs *engine,
2881                              struct i915_request *rq)
2882 {
2883         spin_lock_irq(&engine->active.lock);
2884
2885         /*
2886          * Move this request back to the priority queue, and all of its
2887          * children and grandchildren that were suspended along with it.
2888          */
2889         __execlists_unhold(rq);
2890
2891         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2892                 engine->execlists.queue_priority_hint = rq_prio(rq);
2893                 tasklet_hi_schedule(&engine->execlists.tasklet);
2894         }
2895
2896         spin_unlock_irq(&engine->active.lock);
2897 }
2898
2899 struct execlists_capture {
2900         struct work_struct work;
2901         struct i915_request *rq;
2902         struct i915_gpu_coredump *error;
2903 };
2904
2905 static void execlists_capture_work(struct work_struct *work)
2906 {
2907         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2908         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2909         struct intel_engine_cs *engine = cap->rq->engine;
2910         struct intel_gt_coredump *gt = cap->error->gt;
2911         struct intel_engine_capture_vma *vma;
2912
2913         /* Compress all the objects attached to the request, slow! */
2914         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2915         if (vma) {
2916                 struct i915_vma_compress *compress =
2917                         i915_vma_capture_prepare(gt);
2918
2919                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2920                 i915_vma_capture_finish(gt, compress);
2921         }
2922
2923         gt->simulated = gt->engine->simulated;
2924         cap->error->simulated = gt->simulated;
2925
2926         /* Publish the error state, and announce it to the world */
2927         i915_error_state_store(cap->error);
2928         i915_gpu_coredump_put(cap->error);
2929
2930         /* Return this request and all that depend upon it for signaling */
2931         execlists_unhold(engine, cap->rq);
2932         i915_request_put(cap->rq);
2933
2934         kfree(cap);
2935 }
2936
2937 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2938 {
2939         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2940         struct execlists_capture *cap;
2941
2942         cap = kmalloc(sizeof(*cap), gfp);
2943         if (!cap)
2944                 return NULL;
2945
2946         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2947         if (!cap->error)
2948                 goto err_cap;
2949
2950         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2951         if (!cap->error->gt)
2952                 goto err_gpu;
2953
2954         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2955         if (!cap->error->gt->engine)
2956                 goto err_gt;
2957
2958         return cap;
2959
2960 err_gt:
2961         kfree(cap->error->gt);
2962 err_gpu:
2963         kfree(cap->error);
2964 err_cap:
2965         kfree(cap);
2966         return NULL;
2967 }
2968
2969 static struct i915_request *
2970 active_context(struct intel_engine_cs *engine, u32 ccid)
2971 {
2972         const struct intel_engine_execlists * const el = &engine->execlists;
2973         struct i915_request * const *port, *rq;
2974
2975         /*
2976          * Use the most recent result from process_csb(), but just in case
2977          * we trigger an error (via interrupt) before the first CS event has
2978          * been written, peek at the next submission.
2979          */
2980
2981         for (port = el->active; (rq = *port); port++) {
2982                 if (rq->context->lrc.ccid == ccid) {
2983                         ENGINE_TRACE(engine,
2984                                      "ccid found at active:%zd\n",
2985                                      port - el->active);
2986                         return rq;
2987                 }
2988         }
2989
2990         for (port = el->pending; (rq = *port); port++) {
2991                 if (rq->context->lrc.ccid == ccid) {
2992                         ENGINE_TRACE(engine,
2993                                      "ccid found at pending:%zd\n",
2994                                      port - el->pending);
2995                         return rq;
2996                 }
2997         }
2998
2999         ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3000         return NULL;
3001 }
3002
3003 static u32 active_ccid(struct intel_engine_cs *engine)
3004 {
3005         return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3006 }
3007
3008 static bool execlists_capture(struct intel_engine_cs *engine)
3009 {
3010         struct execlists_capture *cap;
3011
3012         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3013                 return true;
3014
3015         /*
3016          * We need to _quickly_ capture the engine state before we reset.
3017          * We are inside an atomic section (softirq) here and we are delaying
3018          * the forced preemption event.
3019          */
3020         cap = capture_regs(engine);
3021         if (!cap)
3022                 return true;
3023
3024         spin_lock_irq(&engine->active.lock);
3025         cap->rq = active_context(engine, active_ccid(engine));
3026         if (cap->rq) {
3027                 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3028                 cap->rq = i915_request_get_rcu(cap->rq);
3029         }
3030         spin_unlock_irq(&engine->active.lock);
3031         if (!cap->rq)
3032                 goto err_free;
3033
3034         /*
3035          * Remove the request from the execlists queue, and take ownership
3036          * of the request. We pass it to our worker who will _slowly_ compress
3037          * all the pages the _user_ requested for debugging their batch, after
3038          * which we return it to the queue for signaling.
3039          *
3040          * By removing them from the execlists queue, we also remove the
3041          * requests from being processed by __unwind_incomplete_requests()
3042          * during the intel_engine_reset(), and so they will *not* be replayed
3043          * afterwards.
3044          *
3045          * Note that because we have not yet reset the engine at this point,
3046          * it is possible for the request that we have identified as being
3047          * guilty, did in fact complete and we will then hit an arbitration
3048          * point allowing the outstanding preemption to succeed. The likelihood
3049          * of that is very low (as capturing of the engine registers should be
3050          * fast enough to run inside an irq-off atomic section!), so we will
3051          * simply hold that request accountable for being non-preemptible
3052          * long enough to force the reset.
3053          */
3054         if (!execlists_hold(engine, cap->rq))
3055                 goto err_rq;
3056
3057         INIT_WORK(&cap->work, execlists_capture_work);
3058         schedule_work(&cap->work);
3059         return true;
3060
3061 err_rq:
3062         i915_request_put(cap->rq);
3063 err_free:
3064         i915_gpu_coredump_put(cap->error);
3065         kfree(cap);
3066         return false;
3067 }
3068
3069 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3070 {
3071         const unsigned int bit = I915_RESET_ENGINE + engine->id;
3072         unsigned long *lock = &engine->gt->reset.flags;
3073
3074         if (!intel_has_reset_engine(engine->gt))
3075                 return;
3076
3077         if (test_and_set_bit(bit, lock))
3078                 return;
3079
3080         ENGINE_TRACE(engine, "reset for %s\n", msg);
3081
3082         /* Mark this tasklet as disabled to avoid waiting for it to complete */
3083         tasklet_disable_nosync(&engine->execlists.tasklet);
3084
3085         ring_set_paused(engine, 1); /* Freeze the current request in place */
3086         if (execlists_capture(engine))
3087                 intel_engine_reset(engine, msg);
3088         else
3089                 ring_set_paused(engine, 0);
3090
3091         tasklet_enable(&engine->execlists.tasklet);
3092         clear_and_wake_up_bit(bit, lock);
3093 }
3094
3095 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3096 {
3097         const struct timer_list *t = &engine->execlists.preempt;
3098
3099         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3100                 return false;
3101
3102         if (!timer_expired(t))
3103                 return false;
3104
3105         return READ_ONCE(engine->execlists.pending[0]);
3106 }
3107
3108 /*
3109  * Check the unread Context Status Buffers and manage the submission of new
3110  * contexts to the ELSP accordingly.
3111  */
3112 static void execlists_submission_tasklet(unsigned long data)
3113 {
3114         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3115         bool timeout = preempt_timeout(engine);
3116
3117         process_csb(engine);
3118
3119         if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3120                 engine->execlists.error_interrupt = 0;
3121                 if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
3122                         execlists_reset(engine, "CS error");
3123         }
3124
3125         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3126                 unsigned long flags;
3127
3128                 spin_lock_irqsave(&engine->active.lock, flags);
3129                 __execlists_submission_tasklet(engine);
3130                 spin_unlock_irqrestore(&engine->active.lock, flags);
3131
3132                 /* Recheck after serialising with direct-submission */
3133                 if (unlikely(timeout && preempt_timeout(engine)))
3134                         execlists_reset(engine, "preemption time out");
3135         }
3136 }
3137
3138 static void __execlists_kick(struct intel_engine_execlists *execlists)
3139 {
3140         /* Kick the tasklet for some interrupt coalescing and reset handling */
3141         tasklet_hi_schedule(&execlists->tasklet);
3142 }
3143
3144 #define execlists_kick(t, member) \
3145         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3146
3147 static void execlists_timeslice(struct timer_list *timer)
3148 {
3149         execlists_kick(timer, timer);
3150 }
3151
3152 static void execlists_preempt(struct timer_list *timer)
3153 {
3154         execlists_kick(timer, preempt);
3155 }
3156
3157 static void queue_request(struct intel_engine_cs *engine,
3158                           struct i915_request *rq)
3159 {
3160         GEM_BUG_ON(!list_empty(&rq->sched.link));
3161         list_add_tail(&rq->sched.link,
3162                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
3163         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3164 }
3165
3166 static void __submit_queue_imm(struct intel_engine_cs *engine)
3167 {
3168         struct intel_engine_execlists * const execlists = &engine->execlists;
3169
3170         if (reset_in_progress(execlists))
3171                 return; /* defer until we restart the engine following reset */
3172
3173         /* Hopefully we clear execlists->pending[] to let us through */
3174         if (READ_ONCE(execlists->pending[0]) &&
3175             tasklet_trylock(&execlists->tasklet)) {
3176                 process_csb(engine);
3177                 tasklet_unlock(&execlists->tasklet);
3178         }
3179
3180         __execlists_submission_tasklet(engine);
3181 }
3182
3183 static void submit_queue(struct intel_engine_cs *engine,
3184                          const struct i915_request *rq)
3185 {
3186         struct intel_engine_execlists *execlists = &engine->execlists;
3187
3188         if (rq_prio(rq) <= execlists->queue_priority_hint)
3189                 return;
3190
3191         execlists->queue_priority_hint = rq_prio(rq);
3192         __submit_queue_imm(engine);
3193 }
3194
3195 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3196                              const struct i915_request *rq)
3197 {
3198         GEM_BUG_ON(i915_request_on_hold(rq));
3199         return !list_empty(&engine->active.hold) && hold_request(rq);
3200 }
3201
3202 static void execlists_submit_request(struct i915_request *request)
3203 {
3204         struct intel_engine_cs *engine = request->engine;
3205         unsigned long flags;
3206
3207         /* Will be called from irq-context when using foreign fences. */
3208         spin_lock_irqsave(&engine->active.lock, flags);
3209
3210         if (unlikely(ancestor_on_hold(engine, request))) {
3211                 RQ_TRACE(request, "ancestor on hold\n");
3212                 list_add_tail(&request->sched.link, &engine->active.hold);
3213                 i915_request_set_hold(request);
3214         } else {
3215                 queue_request(engine, request);
3216
3217                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3218                 GEM_BUG_ON(list_empty(&request->sched.link));
3219
3220                 submit_queue(engine, request);
3221         }
3222
3223         spin_unlock_irqrestore(&engine->active.lock, flags);
3224 }
3225
3226 static void __execlists_context_fini(struct intel_context *ce)
3227 {
3228         intel_ring_put(ce->ring);
3229         i915_vma_put(ce->state);
3230 }
3231
3232 static void execlists_context_destroy(struct kref *kref)
3233 {
3234         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3235
3236         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3237         GEM_BUG_ON(intel_context_is_pinned(ce));
3238
3239         if (ce->state)
3240                 __execlists_context_fini(ce);
3241
3242         intel_context_fini(ce);
3243         intel_context_free(ce);
3244 }
3245
3246 static void
3247 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3248 {
3249         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3250                 return;
3251
3252         vaddr += engine->context_size;
3253
3254         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3255 }
3256
3257 static void
3258 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3259 {
3260         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3261                 return;
3262
3263         vaddr += engine->context_size;
3264
3265         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3266                 drm_err_once(&engine->i915->drm,
3267                              "%s context redzone overwritten!\n",
3268                              engine->name);
3269 }
3270
3271 static void execlists_context_unpin(struct intel_context *ce)
3272 {
3273         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3274                       ce->engine);
3275
3276         i915_gem_object_unpin_map(ce->state->obj);
3277 }
3278
3279 static u32 *
3280 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3281 {
3282         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3283                 MI_SRM_LRM_GLOBAL_GTT |
3284                 MI_LRI_LRM_CS_MMIO;
3285         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3286         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3287                 CTX_TIMESTAMP * sizeof(u32);
3288         *cs++ = 0;
3289
3290         *cs++ = MI_LOAD_REGISTER_REG |
3291                 MI_LRR_SOURCE_CS_MMIO |
3292                 MI_LRI_LRM_CS_MMIO;
3293         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3294         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3295
3296         *cs++ = MI_LOAD_REGISTER_REG |
3297                 MI_LRR_SOURCE_CS_MMIO |
3298                 MI_LRI_LRM_CS_MMIO;
3299         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3300         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3301
3302         return cs;
3303 }
3304
3305 static u32 *
3306 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3307 {
3308         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3309
3310         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3311                 MI_SRM_LRM_GLOBAL_GTT |
3312                 MI_LRI_LRM_CS_MMIO;
3313         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3314         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3315                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3316         *cs++ = 0;
3317
3318         return cs;
3319 }
3320
3321 static u32 *
3322 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3323 {
3324         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3325
3326         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3327                 MI_SRM_LRM_GLOBAL_GTT |
3328                 MI_LRI_LRM_CS_MMIO;
3329         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3330         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3331                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3332         *cs++ = 0;
3333
3334         *cs++ = MI_LOAD_REGISTER_REG |
3335                 MI_LRR_SOURCE_CS_MMIO |
3336                 MI_LRI_LRM_CS_MMIO;
3337         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3338         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3339
3340         return cs;
3341 }
3342
3343 static u32 *
3344 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3345 {
3346         cs = gen12_emit_timestamp_wa(ce, cs);
3347         cs = gen12_emit_cmd_buf_wa(ce, cs);
3348         cs = gen12_emit_restore_scratch(ce, cs);
3349
3350         return cs;
3351 }
3352
3353 static u32 *
3354 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3355 {
3356         cs = gen12_emit_timestamp_wa(ce, cs);
3357         cs = gen12_emit_restore_scratch(ce, cs);
3358
3359         return cs;
3360 }
3361
3362 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3363 {
3364         return PAGE_SIZE * ce->wa_bb_page;
3365 }
3366
3367 static u32 *context_indirect_bb(const struct intel_context *ce)
3368 {
3369         void *ptr;
3370
3371         GEM_BUG_ON(!ce->wa_bb_page);
3372
3373         ptr = ce->lrc_reg_state;
3374         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3375         ptr += context_wa_bb_offset(ce);
3376
3377         return ptr;
3378 }
3379
3380 static void
3381 setup_indirect_ctx_bb(const struct intel_context *ce,
3382                       const struct intel_engine_cs *engine,
3383                       u32 *(*emit)(const struct intel_context *, u32 *))
3384 {
3385         u32 * const start = context_indirect_bb(ce);
3386         u32 *cs;
3387
3388         cs = emit(ce, start);
3389         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3390         while ((unsigned long)cs % CACHELINE_BYTES)
3391                 *cs++ = MI_NOOP;
3392
3393         lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3394                                     i915_ggtt_offset(ce->state) +
3395                                     context_wa_bb_offset(ce),
3396                                     (cs - start) * sizeof(*cs));
3397 }
3398
3399 static void
3400 __execlists_update_reg_state(const struct intel_context *ce,
3401                              const struct intel_engine_cs *engine,
3402                              u32 head)
3403 {
3404         struct intel_ring *ring = ce->ring;
3405         u32 *regs = ce->lrc_reg_state;
3406
3407         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3408         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3409
3410         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3411         regs[CTX_RING_HEAD] = head;
3412         regs[CTX_RING_TAIL] = ring->tail;
3413         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3414
3415         /* RPCS */
3416         if (engine->class == RENDER_CLASS) {
3417                 regs[CTX_R_PWR_CLK_STATE] =
3418                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
3419
3420                 i915_oa_init_reg_state(ce, engine);
3421         }
3422
3423         if (ce->wa_bb_page) {
3424                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3425
3426                 fn = gen12_emit_indirect_ctx_xcs;
3427                 if (ce->engine->class == RENDER_CLASS)
3428                         fn = gen12_emit_indirect_ctx_rcs;
3429
3430                 /* Mutually exclusive wrt to global indirect bb */
3431                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3432                 setup_indirect_ctx_bb(ce, engine, fn);
3433         }
3434 }
3435
3436 static int
3437 __execlists_context_pin(struct intel_context *ce,
3438                         struct intel_engine_cs *engine)
3439 {
3440         void *vaddr;
3441
3442         GEM_BUG_ON(!ce->state);
3443         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3444
3445         vaddr = i915_gem_object_pin_map(ce->state->obj,
3446                                         i915_coherent_map_type(engine->i915) |
3447                                         I915_MAP_OVERRIDE);
3448         if (IS_ERR(vaddr))
3449                 return PTR_ERR(vaddr);
3450
3451         ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3452         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3453         __execlists_update_reg_state(ce, engine, ce->ring->tail);
3454
3455         return 0;
3456 }
3457
3458 static int execlists_context_pin(struct intel_context *ce)
3459 {
3460         return __execlists_context_pin(ce, ce->engine);
3461 }
3462
3463 static int execlists_context_alloc(struct intel_context *ce)
3464 {
3465         return __execlists_context_alloc(ce, ce->engine);
3466 }
3467
3468 static void execlists_context_reset(struct intel_context *ce)
3469 {
3470         CE_TRACE(ce, "reset\n");
3471         GEM_BUG_ON(!intel_context_is_pinned(ce));
3472
3473         intel_ring_reset(ce->ring, ce->ring->emit);
3474
3475         /* Scrub away the garbage */
3476         execlists_init_reg_state(ce->lrc_reg_state,
3477                                  ce, ce->engine, ce->ring, true);
3478         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3479
3480         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3481 }
3482
3483 static const struct intel_context_ops execlists_context_ops = {
3484         .alloc = execlists_context_alloc,
3485
3486         .pin = execlists_context_pin,
3487         .unpin = execlists_context_unpin,
3488
3489         .enter = intel_context_enter_engine,
3490         .exit = intel_context_exit_engine,
3491
3492         .reset = execlists_context_reset,
3493         .destroy = execlists_context_destroy,
3494 };
3495
3496 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3497 {
3498         u32 *cs;
3499
3500         GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3501         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3502                 return 0;
3503
3504         cs = intel_ring_begin(rq, 6);
3505         if (IS_ERR(cs))
3506                 return PTR_ERR(cs);
3507
3508         /*
3509          * Check if we have been preempted before we even get started.
3510          *
3511          * After this point i915_request_started() reports true, even if
3512          * we get preempted and so are no longer running.
3513          */
3514         *cs++ = MI_ARB_CHECK;
3515         *cs++ = MI_NOOP;
3516
3517         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3518         *cs++ = i915_request_timeline(rq)->hwsp_offset;
3519         *cs++ = 0;
3520         *cs++ = rq->fence.seqno - 1;
3521
3522         intel_ring_advance(rq, cs);
3523
3524         /* Record the updated position of the request's payload */
3525         rq->infix = intel_ring_offset(rq, cs);
3526
3527         __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3528
3529         return 0;
3530 }
3531
3532 static int emit_pdps(struct i915_request *rq)
3533 {
3534         const struct intel_engine_cs * const engine = rq->engine;
3535         struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3536         int err, i;
3537         u32 *cs;
3538
3539         GEM_BUG_ON(intel_vgpu_active(rq->i915));
3540
3541         /*
3542          * Beware ye of the dragons, this sequence is magic!
3543          *
3544          * Small changes to this sequence can cause anything from
3545          * GPU hangs to forcewake errors and machine lockups!
3546          */
3547
3548         /* Flush any residual operations from the context load */
3549         err = engine->emit_flush(rq, EMIT_FLUSH);
3550         if (err)
3551                 return err;
3552
3553         /* Magic required to prevent forcewake errors! */
3554         err = engine->emit_flush(rq, EMIT_INVALIDATE);
3555         if (err)
3556                 return err;
3557
3558         cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3559         if (IS_ERR(cs))
3560                 return PTR_ERR(cs);
3561
3562         /* Ensure the LRI have landed before we invalidate & continue */
3563         *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3564         for (i = GEN8_3LVL_PDPES; i--; ) {
3565                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3566                 u32 base = engine->mmio_base;
3567
3568                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3569                 *cs++ = upper_32_bits(pd_daddr);
3570                 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3571                 *cs++ = lower_32_bits(pd_daddr);
3572         }
3573         *cs++ = MI_NOOP;
3574
3575         intel_ring_advance(rq, cs);
3576
3577         return 0;
3578 }
3579
3580 static int execlists_request_alloc(struct i915_request *request)
3581 {
3582         int ret;
3583
3584         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3585
3586         /*
3587          * Flush enough space to reduce the likelihood of waiting after
3588          * we start building the request - in which case we will just
3589          * have to repeat work.
3590          */
3591         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3592
3593         /*
3594          * Note that after this point, we have committed to using
3595          * this request as it is being used to both track the
3596          * state of engine initialisation and liveness of the
3597          * golden renderstate above. Think twice before you try
3598          * to cancel/unwind this request now.
3599          */
3600
3601         if (!i915_vm_is_4lvl(request->context->vm)) {
3602                 ret = emit_pdps(request);
3603                 if (ret)
3604                         return ret;
3605         }
3606
3607         /* Unconditionally invalidate GPU caches and TLBs. */
3608         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3609         if (ret)
3610                 return ret;
3611
3612         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3613         return 0;
3614 }
3615
3616 /*
3617  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3618  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3619  * but there is a slight complication as this is applied in WA batch where the
3620  * values are only initialized once so we cannot take register value at the
3621  * beginning and reuse it further; hence we save its value to memory, upload a
3622  * constant value with bit21 set and then we restore it back with the saved value.
3623  * To simplify the WA, a constant value is formed by using the default value
3624  * of this register. This shouldn't be a problem because we are only modifying
3625  * it for a short period and this batch in non-premptible. We can ofcourse
3626  * use additional instructions that read the actual value of the register
3627  * at that time and set our bit of interest but it makes the WA complicated.
3628  *
3629  * This WA is also required for Gen9 so extracting as a function avoids
3630  * code duplication.
3631  */
3632 static u32 *
3633 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3634 {
3635         /* NB no one else is allowed to scribble over scratch + 256! */
3636         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3637         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3638         *batch++ = intel_gt_scratch_offset(engine->gt,
3639                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3640         *batch++ = 0;
3641
3642         *batch++ = MI_LOAD_REGISTER_IMM(1);
3643         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3644         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3645
3646         batch = gen8_emit_pipe_control(batch,
3647                                        PIPE_CONTROL_CS_STALL |
3648                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3649                                        0);
3650
3651         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3652         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3653         *batch++ = intel_gt_scratch_offset(engine->gt,
3654                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3655         *batch++ = 0;
3656
3657         return batch;
3658 }
3659
3660 /*
3661  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3662  * initialized at the beginning and shared across all contexts but this field
3663  * helps us to have multiple batches at different offsets and select them based
3664  * on a criteria. At the moment this batch always start at the beginning of the page
3665  * and at this point we don't have multiple wa_ctx batch buffers.
3666  *
3667  * The number of WA applied are not known at the beginning; we use this field
3668  * to return the no of DWORDS written.
3669  *
3670  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3671  * so it adds NOOPs as padding to make it cacheline aligned.
3672  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3673  * makes a complete batch buffer.
3674  */
3675 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3676 {
3677         /* WaDisableCtxRestoreArbitration:bdw,chv */
3678         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3679
3680         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3681         if (IS_BROADWELL(engine->i915))
3682                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3683
3684         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3685         /* Actual scratch location is at 128 bytes offset */
3686         batch = gen8_emit_pipe_control(batch,
3687                                        PIPE_CONTROL_FLUSH_L3 |
3688                                        PIPE_CONTROL_STORE_DATA_INDEX |
3689                                        PIPE_CONTROL_CS_STALL |
3690                                        PIPE_CONTROL_QW_WRITE,
3691                                        LRC_PPHWSP_SCRATCH_ADDR);
3692
3693         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3694
3695         /* Pad to end of cacheline */
3696         while ((unsigned long)batch % CACHELINE_BYTES)
3697                 *batch++ = MI_NOOP;
3698
3699         /*
3700          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3701          * execution depends on the length specified in terms of cache lines
3702          * in the register CTX_RCS_INDIRECT_CTX
3703          */
3704
3705         return batch;
3706 }
3707
3708 struct lri {
3709         i915_reg_t reg;
3710         u32 value;
3711 };
3712
3713 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3714 {
3715         GEM_BUG_ON(!count || count > 63);
3716
3717         *batch++ = MI_LOAD_REGISTER_IMM(count);
3718         do {
3719                 *batch++ = i915_mmio_reg_offset(lri->reg);
3720                 *batch++ = lri->value;
3721         } while (lri++, --count);
3722         *batch++ = MI_NOOP;
3723
3724         return batch;
3725 }
3726
3727 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3728 {
3729         static const struct lri lri[] = {
3730                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3731                 {
3732                         COMMON_SLICE_CHICKEN2,
3733                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3734                                        0),
3735                 },
3736
3737                 /* BSpec: 11391 */
3738                 {
3739                         FF_SLICE_CHICKEN,
3740                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3741                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3742                 },
3743
3744                 /* BSpec: 11299 */
3745                 {
3746                         _3D_CHICKEN3,
3747                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3748                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3749                 }
3750         };
3751
3752         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3753
3754         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3755         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3756
3757         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3758         batch = gen8_emit_pipe_control(batch,
3759                                        PIPE_CONTROL_FLUSH_L3 |
3760                                        PIPE_CONTROL_STORE_DATA_INDEX |
3761                                        PIPE_CONTROL_CS_STALL |
3762                                        PIPE_CONTROL_QW_WRITE,
3763                                        LRC_PPHWSP_SCRATCH_ADDR);
3764
3765         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3766
3767         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3768         if (HAS_POOLED_EU(engine->i915)) {
3769                 /*
3770                  * EU pool configuration is setup along with golden context
3771                  * during context initialization. This value depends on
3772                  * device type (2x6 or 3x6) and needs to be updated based
3773                  * on which subslice is disabled especially for 2x6
3774                  * devices, however it is safe to load default
3775                  * configuration of 3x6 device instead of masking off
3776                  * corresponding bits because HW ignores bits of a disabled
3777                  * subslice and drops down to appropriate config. Please
3778                  * see render_state_setup() in i915_gem_render_state.c for
3779                  * possible configurations, to avoid duplication they are
3780                  * not shown here again.
3781                  */
3782                 *batch++ = GEN9_MEDIA_POOL_STATE;
3783                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3784                 *batch++ = 0x00777000;
3785                 *batch++ = 0;
3786                 *batch++ = 0;
3787                 *batch++ = 0;
3788         }
3789
3790         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3791
3792         /* Pad to end of cacheline */
3793         while ((unsigned long)batch % CACHELINE_BYTES)
3794                 *batch++ = MI_NOOP;
3795
3796         return batch;
3797 }
3798
3799 static u32 *
3800 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3801 {
3802         int i;
3803
3804         /*
3805          * WaPipeControlBefore3DStateSamplePattern: cnl
3806          *
3807          * Ensure the engine is idle prior to programming a
3808          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3809          */
3810         batch = gen8_emit_pipe_control(batch,
3811                                        PIPE_CONTROL_CS_STALL,
3812                                        0);
3813         /*
3814          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3815          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3816          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3817          * confusing. Since gen8_emit_pipe_control() already advances the
3818          * batch by 6 dwords, we advance the other 10 here, completing a
3819          * cacheline. It's not clear if the workaround requires this padding
3820          * before other commands, or if it's just the regular padding we would
3821          * already have for the workaround bb, so leave it here for now.
3822          */
3823         for (i = 0; i < 10; i++)
3824                 *batch++ = MI_NOOP;
3825
3826         /* Pad to end of cacheline */
3827         while ((unsigned long)batch % CACHELINE_BYTES)
3828                 *batch++ = MI_NOOP;
3829
3830         return batch;
3831 }
3832
3833 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3834
3835 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3836 {
3837         struct drm_i915_gem_object *obj;
3838         struct i915_vma *vma;
3839         int err;
3840
3841         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3842         if (IS_ERR(obj))
3843                 return PTR_ERR(obj);
3844
3845         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3846         if (IS_ERR(vma)) {
3847                 err = PTR_ERR(vma);
3848                 goto err;
3849         }
3850
3851         err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3852         if (err)
3853                 goto err;
3854
3855         engine->wa_ctx.vma = vma;
3856         return 0;
3857
3858 err:
3859         i915_gem_object_put(obj);
3860         return err;
3861 }
3862
3863 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3864 {
3865         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3866 }
3867
3868 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3869
3870 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3871 {
3872         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3873         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3874                                             &wa_ctx->per_ctx };
3875         wa_bb_func_t wa_bb_fn[2];
3876         struct page *page;
3877         void *batch, *batch_ptr;
3878         unsigned int i;
3879         int ret;
3880
3881         if (engine->class != RENDER_CLASS)
3882                 return 0;
3883
3884         switch (INTEL_GEN(engine->i915)) {
3885         case 12:
3886         case 11:
3887                 return 0;
3888         case 10:
3889                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3890                 wa_bb_fn[1] = NULL;
3891                 break;
3892         case 9:
3893                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3894                 wa_bb_fn[1] = NULL;
3895                 break;
3896         case 8:
3897                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3898                 wa_bb_fn[1] = NULL;
3899                 break;
3900         default:
3901                 MISSING_CASE(INTEL_GEN(engine->i915));
3902                 return 0;
3903         }
3904
3905         ret = lrc_setup_wa_ctx(engine);
3906         if (ret) {
3907                 drm_dbg(&engine->i915->drm,
3908                         "Failed to setup context WA page: %d\n", ret);
3909                 return ret;
3910         }
3911
3912         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3913         batch = batch_ptr = kmap_atomic(page);
3914
3915         /*
3916          * Emit the two workaround batch buffers, recording the offset from the
3917          * start of the workaround batch buffer object for each and their
3918          * respective sizes.
3919          */
3920         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3921                 wa_bb[i]->offset = batch_ptr - batch;
3922                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3923                                                   CACHELINE_BYTES))) {
3924                         ret = -EINVAL;
3925                         break;
3926                 }
3927                 if (wa_bb_fn[i])
3928                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3929                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3930         }
3931
3932         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3933
3934         kunmap_atomic(batch);
3935         if (ret)
3936                 lrc_destroy_wa_ctx(engine);
3937
3938         return ret;
3939 }
3940
3941 static void reset_csb_pointers(struct intel_engine_cs *engine)
3942 {
3943         struct intel_engine_execlists * const execlists = &engine->execlists;
3944         const unsigned int reset_value = execlists->csb_size - 1;
3945
3946         ring_set_paused(engine, 0);
3947
3948         /*
3949          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3950          * Bludgeon them with a mmio update to be sure.
3951          */
3952         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3953                      0xffff << 16 | reset_value << 8 | reset_value);
3954         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3955
3956         /*
3957          * After a reset, the HW starts writing into CSB entry [0]. We
3958          * therefore have to set our HEAD pointer back one entry so that
3959          * the *first* entry we check is entry 0. To complicate this further,
3960          * as we don't wait for the first interrupt after reset, we have to
3961          * fake the HW write to point back to the last entry so that our
3962          * inline comparison of our cached head position against the last HW
3963          * write works even before the first interrupt.
3964          */
3965         execlists->csb_head = reset_value;
3966         WRITE_ONCE(*execlists->csb_write, reset_value);
3967         wmb(); /* Make sure this is visible to HW (paranoia?) */
3968
3969         invalidate_csb_entries(&execlists->csb_status[0],
3970                                &execlists->csb_status[reset_value]);
3971
3972         /* Once more for luck and our trusty paranoia */
3973         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3974                      0xffff << 16 | reset_value << 8 | reset_value);
3975         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3976
3977         GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
3978 }
3979
3980 static void execlists_sanitize(struct intel_engine_cs *engine)
3981 {
3982         /*
3983          * Poison residual state on resume, in case the suspend didn't!
3984          *
3985          * We have to assume that across suspend/resume (or other loss
3986          * of control) that the contents of our pinned buffers has been
3987          * lost, replaced by garbage. Since this doesn't always happen,
3988          * let's poison such state so that we more quickly spot when
3989          * we falsely assume it has been preserved.
3990          */
3991         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3992                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3993
3994         reset_csb_pointers(engine);
3995
3996         /*
3997          * The kernel_context HWSP is stored in the status_page. As above,
3998          * that may be lost on resume/initialisation, and so we need to
3999          * reset the value in the HWSP.
4000          */
4001         intel_timeline_reset_seqno(engine->kernel_context->timeline);
4002
4003         /* And scrub the dirty cachelines for the HWSP */
4004         clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4005 }
4006
4007 static void enable_error_interrupt(struct intel_engine_cs *engine)
4008 {
4009         u32 status;
4010
4011         engine->execlists.error_interrupt = 0;
4012         ENGINE_WRITE(engine, RING_EMR, ~0u);
4013         ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4014
4015         status = ENGINE_READ(engine, RING_ESR);
4016         if (unlikely(status)) {
4017                 drm_err(&engine->i915->drm,
4018                         "engine '%s' resumed still in error: %08x\n",
4019                         engine->name, status);
4020                 __intel_gt_reset(engine->gt, engine->mask);
4021         }
4022
4023         /*
4024          * On current gen8+, we have 2 signals to play with
4025          *
4026          * - I915_ERROR_INSTUCTION (bit 0)
4027          *
4028          *    Generate an error if the command parser encounters an invalid
4029          *    instruction
4030          *
4031          *    This is a fatal error.
4032          *
4033          * - CP_PRIV (bit 2)
4034          *
4035          *    Generate an error on privilege violation (where the CP replaces
4036          *    the instruction with a no-op). This also fires for writes into
4037          *    read-only scratch pages.
4038          *
4039          *    This is a non-fatal error, parsing continues.
4040          *
4041          * * there are a few others defined for odd HW that we do not use
4042          *
4043          * Since CP_PRIV fires for cases where we have chosen to ignore the
4044          * error (as the HW is validating and suppressing the mistakes), we
4045          * only unmask the instruction error bit.
4046          */
4047         ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4048 }
4049
4050 static void enable_execlists(struct intel_engine_cs *engine)
4051 {
4052         u32 mode;
4053
4054         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4055
4056         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4057
4058         if (INTEL_GEN(engine->i915) >= 11)
4059                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4060         else
4061                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4062         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4063
4064         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4065
4066         ENGINE_WRITE_FW(engine,
4067                         RING_HWS_PGA,
4068                         i915_ggtt_offset(engine->status_page.vma));
4069         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4070
4071         enable_error_interrupt(engine);
4072
4073         engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4074 }
4075
4076 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4077 {
4078         bool unexpected = false;
4079
4080         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4081                 drm_dbg(&engine->i915->drm,
4082                         "STOP_RING still set in RING_MI_MODE\n");
4083                 unexpected = true;
4084         }
4085
4086         return unexpected;
4087 }
4088
4089 static int execlists_resume(struct intel_engine_cs *engine)
4090 {
4091         intel_mocs_init_engine(engine);
4092
4093         intel_engine_reset_breadcrumbs(engine);
4094
4095         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4096                 struct drm_printer p = drm_debug_printer(__func__);
4097
4098                 intel_engine_dump(engine, &p, NULL);
4099         }
4100
4101         enable_execlists(engine);
4102
4103         return 0;
4104 }
4105
4106 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4107 {
4108         struct intel_engine_execlists * const execlists = &engine->execlists;
4109         unsigned long flags;
4110
4111         ENGINE_TRACE(engine, "depth<-%d\n",
4112                      atomic_read(&execlists->tasklet.count));
4113
4114         /*
4115          * Prevent request submission to the hardware until we have
4116          * completed the reset in i915_gem_reset_finish(). If a request
4117          * is completed by one engine, it may then queue a request
4118          * to a second via its execlists->tasklet *just* as we are
4119          * calling engine->resume() and also writing the ELSP.
4120          * Turning off the execlists->tasklet until the reset is over
4121          * prevents the race.
4122          */
4123         __tasklet_disable_sync_once(&execlists->tasklet);
4124         GEM_BUG_ON(!reset_in_progress(execlists));
4125
4126         /* And flush any current direct submission. */
4127         spin_lock_irqsave(&engine->active.lock, flags);
4128         spin_unlock_irqrestore(&engine->active.lock, flags);
4129
4130         /*
4131          * We stop engines, otherwise we might get failed reset and a
4132          * dead gpu (on elk). Also as modern gpu as kbl can suffer
4133          * from system hang if batchbuffer is progressing when
4134          * the reset is issued, regardless of READY_TO_RESET ack.
4135          * Thus assume it is best to stop engines on all gens
4136          * where we have a gpu reset.
4137          *
4138          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4139          *
4140          * FIXME: Wa for more modern gens needs to be validated
4141          */
4142         ring_set_paused(engine, 1);
4143         intel_engine_stop_cs(engine);
4144
4145         engine->execlists.reset_ccid = active_ccid(engine);
4146 }
4147
4148 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4149 {
4150         int x;
4151
4152         x = lrc_ring_mi_mode(engine);
4153         if (x != -1) {
4154                 regs[x + 1] &= ~STOP_RING;
4155                 regs[x + 1] |= STOP_RING << 16;
4156         }
4157 }
4158
4159 static void __execlists_reset_reg_state(const struct intel_context *ce,
4160                                         const struct intel_engine_cs *engine)
4161 {
4162         u32 *regs = ce->lrc_reg_state;
4163
4164         __reset_stop_ring(regs, engine);
4165 }
4166
4167 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4168 {
4169         struct intel_engine_execlists * const execlists = &engine->execlists;
4170         struct intel_context *ce;
4171         struct i915_request *rq;
4172         u32 head;
4173
4174         mb(); /* paranoia: read the CSB pointers from after the reset */
4175         clflush(execlists->csb_write);
4176         mb();
4177
4178         process_csb(engine); /* drain preemption events */
4179
4180         /* Following the reset, we need to reload the CSB read/write pointers */
4181         reset_csb_pointers(engine);
4182
4183         /*
4184          * Save the currently executing context, even if we completed
4185          * its request, it was still running at the time of the
4186          * reset and will have been clobbered.
4187          */
4188         rq = active_context(engine, engine->execlists.reset_ccid);
4189         if (!rq)
4190                 goto unwind;
4191
4192         ce = rq->context;
4193         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4194
4195         if (i915_request_completed(rq)) {
4196                 /* Idle context; tidy up the ring so we can restart afresh */
4197                 head = intel_ring_wrap(ce->ring, rq->tail);
4198                 goto out_replay;
4199         }
4200
4201         /* We still have requests in-flight; the engine should be active */
4202         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4203
4204         /* Context has requests still in-flight; it should not be idle! */
4205         GEM_BUG_ON(i915_active_is_idle(&ce->active));
4206
4207         rq = active_request(ce->timeline, rq);
4208         head = intel_ring_wrap(ce->ring, rq->head);
4209         GEM_BUG_ON(head == ce->ring->tail);
4210
4211         /*
4212          * If this request hasn't started yet, e.g. it is waiting on a
4213          * semaphore, we need to avoid skipping the request or else we
4214          * break the signaling chain. However, if the context is corrupt
4215          * the request will not restart and we will be stuck with a wedged
4216          * device. It is quite often the case that if we issue a reset
4217          * while the GPU is loading the context image, that the context
4218          * image becomes corrupt.
4219          *
4220          * Otherwise, if we have not started yet, the request should replay
4221          * perfectly and we do not need to flag the result as being erroneous.
4222          */
4223         if (!i915_request_started(rq))
4224                 goto out_replay;
4225
4226         /*
4227          * If the request was innocent, we leave the request in the ELSP
4228          * and will try to replay it on restarting. The context image may
4229          * have been corrupted by the reset, in which case we may have
4230          * to service a new GPU hang, but more likely we can continue on
4231          * without impact.
4232          *
4233          * If the request was guilty, we presume the context is corrupt
4234          * and have to at least restore the RING register in the context
4235          * image back to the expected values to skip over the guilty request.
4236          */
4237         __i915_request_reset(rq, stalled);
4238
4239         /*
4240          * We want a simple context + ring to execute the breadcrumb update.
4241          * We cannot rely on the context being intact across the GPU hang,
4242          * so clear it and rebuild just what we need for the breadcrumb.
4243          * All pending requests for this context will be zapped, and any
4244          * future request will be after userspace has had the opportunity
4245          * to recreate its own state.
4246          */
4247 out_replay:
4248         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4249                      head, ce->ring->tail);
4250         __execlists_reset_reg_state(ce, engine);
4251         __execlists_update_reg_state(ce, engine, head);
4252         ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4253
4254 unwind:
4255         /* Push back any incomplete requests for replay after the reset. */
4256         cancel_port_requests(execlists);
4257         __unwind_incomplete_requests(engine);
4258 }
4259
4260 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4261 {
4262         unsigned long flags;
4263
4264         ENGINE_TRACE(engine, "\n");
4265
4266         spin_lock_irqsave(&engine->active.lock, flags);
4267
4268         __execlists_reset(engine, stalled);
4269
4270         spin_unlock_irqrestore(&engine->active.lock, flags);
4271 }
4272
4273 static void nop_submission_tasklet(unsigned long data)
4274 {
4275         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4276
4277         /* The driver is wedged; don't process any more events. */
4278         WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4279 }
4280
4281 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4282 {
4283         struct intel_engine_execlists * const execlists = &engine->execlists;
4284         struct i915_request *rq, *rn;
4285         struct rb_node *rb;
4286         unsigned long flags;
4287
4288         ENGINE_TRACE(engine, "\n");
4289
4290         /*
4291          * Before we call engine->cancel_requests(), we should have exclusive
4292          * access to the submission state. This is arranged for us by the
4293          * caller disabling the interrupt generation, the tasklet and other
4294          * threads that may then access the same state, giving us a free hand
4295          * to reset state. However, we still need to let lockdep be aware that
4296          * we know this state may be accessed in hardirq context, so we
4297          * disable the irq around this manipulation and we want to keep
4298          * the spinlock focused on its duties and not accidentally conflate
4299          * coverage to the submission's irq state. (Similarly, although we
4300          * shouldn't need to disable irq around the manipulation of the
4301          * submission's irq state, we also wish to remind ourselves that
4302          * it is irq state.)
4303          */
4304         spin_lock_irqsave(&engine->active.lock, flags);
4305
4306         __execlists_reset(engine, true);
4307
4308         /* Mark all executing requests as skipped. */
4309         list_for_each_entry(rq, &engine->active.requests, sched.link)
4310                 mark_eio(rq);
4311
4312         /* Flush the queued requests to the timeline list (for retiring). */
4313         while ((rb = rb_first_cached(&execlists->queue))) {
4314                 struct i915_priolist *p = to_priolist(rb);
4315                 int i;
4316
4317                 priolist_for_each_request_consume(rq, rn, p, i) {
4318                         mark_eio(rq);
4319                         __i915_request_submit(rq);
4320                 }
4321
4322                 rb_erase_cached(&p->node, &execlists->queue);
4323                 i915_priolist_free(p);
4324         }
4325
4326         /* On-hold requests will be flushed to timeline upon their release */
4327         list_for_each_entry(rq, &engine->active.hold, sched.link)
4328                 mark_eio(rq);
4329
4330         /* Cancel all attached virtual engines */
4331         while ((rb = rb_first_cached(&execlists->virtual))) {
4332                 struct virtual_engine *ve =
4333                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4334
4335                 rb_erase_cached(rb, &execlists->virtual);
4336                 RB_CLEAR_NODE(rb);
4337
4338                 spin_lock(&ve->base.active.lock);
4339                 rq = fetch_and_zero(&ve->request);
4340                 if (rq) {
4341                         mark_eio(rq);
4342
4343                         rq->engine = engine;
4344                         __i915_request_submit(rq);
4345                         i915_request_put(rq);
4346
4347                         ve->base.execlists.queue_priority_hint = INT_MIN;
4348                 }
4349                 spin_unlock(&ve->base.active.lock);
4350         }
4351
4352         /* Remaining _unready_ requests will be nop'ed when submitted */
4353
4354         execlists->queue_priority_hint = INT_MIN;
4355         execlists->queue = RB_ROOT_CACHED;
4356
4357         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4358         execlists->tasklet.func = nop_submission_tasklet;
4359
4360         spin_unlock_irqrestore(&engine->active.lock, flags);
4361 }
4362
4363 static void execlists_reset_finish(struct intel_engine_cs *engine)
4364 {
4365         struct intel_engine_execlists * const execlists = &engine->execlists;
4366
4367         /*
4368          * After a GPU reset, we may have requests to replay. Do so now while
4369          * we still have the forcewake to be sure that the GPU is not allowed
4370          * to sleep before we restart and reload a context.
4371          */
4372         GEM_BUG_ON(!reset_in_progress(execlists));
4373         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4374                 execlists->tasklet.func(execlists->tasklet.data);
4375
4376         if (__tasklet_enable(&execlists->tasklet))
4377                 /* And kick in case we missed a new request submission. */
4378                 tasklet_hi_schedule(&execlists->tasklet);
4379         ENGINE_TRACE(engine, "depth->%d\n",
4380                      atomic_read(&execlists->tasklet.count));
4381 }
4382
4383 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4384                                     u64 offset, u32 len,
4385                                     const unsigned int flags)
4386 {
4387         u32 *cs;
4388
4389         cs = intel_ring_begin(rq, 4);
4390         if (IS_ERR(cs))
4391                 return PTR_ERR(cs);
4392
4393         /*
4394          * WaDisableCtxRestoreArbitration:bdw,chv
4395          *
4396          * We don't need to perform MI_ARB_ENABLE as often as we do (in
4397          * particular all the gen that do not need the w/a at all!), if we
4398          * took care to make sure that on every switch into this context
4399          * (both ordinary and for preemption) that arbitrartion was enabled
4400          * we would be fine.  However, for gen8 there is another w/a that
4401          * requires us to not preempt inside GPGPU execution, so we keep
4402          * arbitration disabled for gen8 batches. Arbitration will be
4403          * re-enabled before we close the request
4404          * (engine->emit_fini_breadcrumb).
4405          */
4406         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4407
4408         /* FIXME(BDW+): Address space and security selectors. */
4409         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4410                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4411         *cs++ = lower_32_bits(offset);
4412         *cs++ = upper_32_bits(offset);
4413
4414         intel_ring_advance(rq, cs);
4415
4416         return 0;
4417 }
4418
4419 static int gen8_emit_bb_start(struct i915_request *rq,
4420                               u64 offset, u32 len,
4421                               const unsigned int flags)
4422 {
4423         u32 *cs;
4424
4425         cs = intel_ring_begin(rq, 6);
4426         if (IS_ERR(cs))
4427                 return PTR_ERR(cs);
4428
4429         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4430
4431         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4432                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4433         *cs++ = lower_32_bits(offset);
4434         *cs++ = upper_32_bits(offset);
4435
4436         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4437         *cs++ = MI_NOOP;
4438
4439         intel_ring_advance(rq, cs);
4440
4441         return 0;
4442 }
4443
4444 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4445 {
4446         ENGINE_WRITE(engine, RING_IMR,
4447                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
4448         ENGINE_POSTING_READ(engine, RING_IMR);
4449 }
4450
4451 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4452 {
4453         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4454 }
4455
4456 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4457 {
4458         u32 cmd, *cs;
4459
4460         cs = intel_ring_begin(request, 4);
4461         if (IS_ERR(cs))
4462                 return PTR_ERR(cs);
4463
4464         cmd = MI_FLUSH_DW + 1;
4465
4466         /* We always require a command barrier so that subsequent
4467          * commands, such as breadcrumb interrupts, are strictly ordered
4468          * wrt the contents of the write cache being flushed to memory
4469          * (and thus being coherent from the CPU).
4470          */
4471         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4472
4473         if (mode & EMIT_INVALIDATE) {
4474                 cmd |= MI_INVALIDATE_TLB;
4475                 if (request->engine->class == VIDEO_DECODE_CLASS)
4476                         cmd |= MI_INVALIDATE_BSD;
4477         }
4478
4479         *cs++ = cmd;
4480         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4481         *cs++ = 0; /* upper addr */
4482         *cs++ = 0; /* value */
4483         intel_ring_advance(request, cs);
4484
4485         return 0;
4486 }
4487
4488 static int gen8_emit_flush_render(struct i915_request *request,
4489                                   u32 mode)
4490 {
4491         bool vf_flush_wa = false, dc_flush_wa = false;
4492         u32 *cs, flags = 0;
4493         int len;
4494
4495         flags |= PIPE_CONTROL_CS_STALL;
4496
4497         if (mode & EMIT_FLUSH) {
4498                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4499                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4500                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4501                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4502         }
4503
4504         if (mode & EMIT_INVALIDATE) {
4505                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4506                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4507                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4508                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4509                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4510                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4511                 flags |= PIPE_CONTROL_QW_WRITE;
4512                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4513
4514                 /*
4515                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4516                  * pipe control.
4517                  */
4518                 if (IS_GEN(request->i915, 9))
4519                         vf_flush_wa = true;
4520
4521                 /* WaForGAMHang:kbl */
4522                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
4523                         dc_flush_wa = true;
4524         }
4525
4526         len = 6;
4527
4528         if (vf_flush_wa)
4529                 len += 6;
4530
4531         if (dc_flush_wa)
4532                 len += 12;
4533
4534         cs = intel_ring_begin(request, len);
4535         if (IS_ERR(cs))
4536                 return PTR_ERR(cs);
4537
4538         if (vf_flush_wa)
4539                 cs = gen8_emit_pipe_control(cs, 0, 0);
4540
4541         if (dc_flush_wa)
4542                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4543                                             0);
4544
4545         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4546
4547         if (dc_flush_wa)
4548                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4549
4550         intel_ring_advance(request, cs);
4551
4552         return 0;
4553 }
4554
4555 static int gen11_emit_flush_render(struct i915_request *request,
4556                                    u32 mode)
4557 {
4558         if (mode & EMIT_FLUSH) {
4559                 u32 *cs;
4560                 u32 flags = 0;
4561
4562                 flags |= PIPE_CONTROL_CS_STALL;
4563
4564                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4565                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4566                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4567                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4568                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4569                 flags |= PIPE_CONTROL_QW_WRITE;
4570                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4571
4572                 cs = intel_ring_begin(request, 6);
4573                 if (IS_ERR(cs))
4574                         return PTR_ERR(cs);
4575
4576                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4577                 intel_ring_advance(request, cs);
4578         }
4579
4580         if (mode & EMIT_INVALIDATE) {
4581                 u32 *cs;
4582                 u32 flags = 0;
4583
4584                 flags |= PIPE_CONTROL_CS_STALL;
4585
4586                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4587                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4588                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4589                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4590                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4591                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4592                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4593                 flags |= PIPE_CONTROL_QW_WRITE;
4594                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4595
4596                 cs = intel_ring_begin(request, 6);
4597                 if (IS_ERR(cs))
4598                         return PTR_ERR(cs);
4599
4600                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4601                 intel_ring_advance(request, cs);
4602         }
4603
4604         return 0;
4605 }
4606
4607 static u32 preparser_disable(bool state)
4608 {
4609         return MI_ARB_CHECK | 1 << 8 | state;
4610 }
4611
4612 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4613 {
4614         static const i915_reg_t vd[] = {
4615                 GEN12_VD0_AUX_NV,
4616                 GEN12_VD1_AUX_NV,
4617                 GEN12_VD2_AUX_NV,
4618                 GEN12_VD3_AUX_NV,
4619         };
4620
4621         static const i915_reg_t ve[] = {
4622                 GEN12_VE0_AUX_NV,
4623                 GEN12_VE1_AUX_NV,
4624         };
4625
4626         if (engine->class == VIDEO_DECODE_CLASS)
4627                 return vd[engine->instance];
4628
4629         if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4630                 return ve[engine->instance];
4631
4632         GEM_BUG_ON("unknown aux_inv_reg\n");
4633
4634         return INVALID_MMIO_REG;
4635 }
4636
4637 static u32 *
4638 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4639 {
4640         *cs++ = MI_LOAD_REGISTER_IMM(1);
4641         *cs++ = i915_mmio_reg_offset(inv_reg);
4642         *cs++ = AUX_INV;
4643         *cs++ = MI_NOOP;
4644
4645         return cs;
4646 }
4647
4648 static int gen12_emit_flush_render(struct i915_request *request,
4649                                    u32 mode)
4650 {
4651         if (mode & EMIT_FLUSH) {
4652                 u32 flags = 0;
4653                 u32 *cs;
4654
4655                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4656                 flags |= PIPE_CONTROL_FLUSH_L3;
4657                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4658                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4659                 /* Wa_1409600907:tgl */
4660                 flags |= PIPE_CONTROL_DEPTH_STALL;
4661                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4662                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4663
4664                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4665                 flags |= PIPE_CONTROL_QW_WRITE;
4666
4667                 flags |= PIPE_CONTROL_CS_STALL;
4668
4669                 cs = intel_ring_begin(request, 6);
4670                 if (IS_ERR(cs))
4671                         return PTR_ERR(cs);
4672
4673                 cs = gen12_emit_pipe_control(cs,
4674                                              PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4675                                              flags, LRC_PPHWSP_SCRATCH_ADDR);
4676                 intel_ring_advance(request, cs);
4677         }
4678
4679         if (mode & EMIT_INVALIDATE) {
4680                 u32 flags = 0;
4681                 u32 *cs;
4682
4683                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4684                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4685                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4686                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4687                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4688                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4689                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4690
4691                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4692                 flags |= PIPE_CONTROL_QW_WRITE;
4693
4694                 flags |= PIPE_CONTROL_CS_STALL;
4695
4696                 cs = intel_ring_begin(request, 8 + 4);
4697                 if (IS_ERR(cs))
4698                         return PTR_ERR(cs);
4699
4700                 /*
4701                  * Prevent the pre-parser from skipping past the TLB
4702                  * invalidate and loading a stale page for the batch
4703                  * buffer / request payload.
4704                  */
4705                 *cs++ = preparser_disable(true);
4706
4707                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4708
4709                 /* hsdes: 1809175790 */
4710                 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4711
4712                 *cs++ = preparser_disable(false);
4713                 intel_ring_advance(request, cs);
4714         }
4715
4716         return 0;
4717 }
4718
4719 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4720 {
4721         intel_engine_mask_t aux_inv = 0;
4722         u32 cmd, *cs;
4723
4724         if (mode & EMIT_INVALIDATE)
4725                 aux_inv = request->engine->mask & ~BIT(BCS0);
4726
4727         cs = intel_ring_begin(request,
4728                               4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
4729         if (IS_ERR(cs))
4730                 return PTR_ERR(cs);
4731
4732         cmd = MI_FLUSH_DW + 1;
4733
4734         /* We always require a command barrier so that subsequent
4735          * commands, such as breadcrumb interrupts, are strictly ordered
4736          * wrt the contents of the write cache being flushed to memory
4737          * (and thus being coherent from the CPU).
4738          */
4739         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4740
4741         if (mode & EMIT_INVALIDATE) {
4742                 cmd |= MI_INVALIDATE_TLB;
4743                 if (request->engine->class == VIDEO_DECODE_CLASS)
4744                         cmd |= MI_INVALIDATE_BSD;
4745         }
4746
4747         *cs++ = cmd;
4748         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4749         *cs++ = 0; /* upper addr */
4750         *cs++ = 0; /* value */
4751
4752         if (aux_inv) { /* hsdes: 1809175790 */
4753                 struct intel_engine_cs *engine;
4754                 unsigned int tmp;
4755
4756                 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4757                 for_each_engine_masked(engine, request->engine->gt,
4758                                        aux_inv, tmp) {
4759                         *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4760                         *cs++ = AUX_INV;
4761                 }
4762                 *cs++ = MI_NOOP;
4763         }
4764         intel_ring_advance(request, cs);
4765
4766         return 0;
4767 }
4768
4769 static void assert_request_valid(struct i915_request *rq)
4770 {
4771         struct intel_ring *ring __maybe_unused = rq->ring;
4772
4773         /* Can we unwind this request without appearing to go forwards? */
4774         GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4775 }
4776
4777 /*
4778  * Reserve space for 2 NOOPs at the end of each request to be
4779  * used as a workaround for not being allowed to do lite
4780  * restore with HEAD==TAIL (WaIdleLiteRestore).
4781  */
4782 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4783 {
4784         /* Ensure there's always at least one preemption point per-request. */
4785         *cs++ = MI_ARB_CHECK;
4786         *cs++ = MI_NOOP;
4787         request->wa_tail = intel_ring_offset(request, cs);
4788
4789         /* Check that entire request is less than half the ring */
4790         assert_request_valid(request);
4791
4792         return cs;
4793 }
4794
4795 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4796 {
4797         *cs++ = MI_SEMAPHORE_WAIT |
4798                 MI_SEMAPHORE_GLOBAL_GTT |
4799                 MI_SEMAPHORE_POLL |
4800                 MI_SEMAPHORE_SAD_EQ_SDD;
4801         *cs++ = 0;
4802         *cs++ = intel_hws_preempt_address(request->engine);
4803         *cs++ = 0;
4804
4805         return cs;
4806 }
4807
4808 static __always_inline u32*
4809 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4810 {
4811         *cs++ = MI_USER_INTERRUPT;
4812
4813         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4814         if (intel_engine_has_semaphores(request->engine))
4815                 cs = emit_preempt_busywait(request, cs);
4816
4817         request->tail = intel_ring_offset(request, cs);
4818         assert_ring_tail_valid(request->ring, request->tail);
4819
4820         return gen8_emit_wa_tail(request, cs);
4821 }
4822
4823 static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4824 {
4825         u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4826
4827         return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4828 }
4829
4830 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4831 {
4832         return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4833 }
4834
4835 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4836 {
4837         cs = gen8_emit_pipe_control(cs,
4838                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4839                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4840                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4841                                     0);
4842
4843         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4844         cs = gen8_emit_ggtt_write_rcs(cs,
4845                                       request->fence.seqno,
4846                                       i915_request_active_timeline(request)->hwsp_offset,
4847                                       PIPE_CONTROL_FLUSH_ENABLE |
4848                                       PIPE_CONTROL_CS_STALL);
4849
4850         return gen8_emit_fini_breadcrumb_tail(request, cs);
4851 }
4852
4853 static u32 *
4854 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4855 {
4856         cs = gen8_emit_ggtt_write_rcs(cs,
4857                                       request->fence.seqno,
4858                                       i915_request_active_timeline(request)->hwsp_offset,
4859                                       PIPE_CONTROL_CS_STALL |
4860                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4861                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4862                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4863                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4864                                       PIPE_CONTROL_FLUSH_ENABLE);
4865
4866         return gen8_emit_fini_breadcrumb_tail(request, cs);
4867 }
4868
4869 /*
4870  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4871  * flush and will continue pre-fetching the instructions after it before the
4872  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4873  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4874  * of the next request before the memory has been flushed, we're guaranteed that
4875  * we won't access the batch itself too early.
4876  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4877  * so, if the current request is modifying an instruction in the next request on
4878  * the same intel_context, we might pre-fetch and then execute the pre-update
4879  * instruction. To avoid this, the users of self-modifying code should either
4880  * disable the parser around the code emitting the memory writes, via a new flag
4881  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4882  * the in-kernel use-cases we've opted to use a separate context, see
4883  * reloc_gpu() as an example.
4884  * All the above applies only to the instructions themselves. Non-inline data
4885  * used by the instructions is not pre-fetched.
4886  */
4887
4888 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4889 {
4890         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4891                 MI_SEMAPHORE_GLOBAL_GTT |
4892                 MI_SEMAPHORE_POLL |
4893                 MI_SEMAPHORE_SAD_EQ_SDD;
4894         *cs++ = 0;
4895         *cs++ = intel_hws_preempt_address(request->engine);
4896         *cs++ = 0;
4897         *cs++ = 0;
4898         *cs++ = MI_NOOP;
4899
4900         return cs;
4901 }
4902
4903 static __always_inline u32*
4904 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4905 {
4906         *cs++ = MI_USER_INTERRUPT;
4907
4908         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4909         if (intel_engine_has_semaphores(request->engine))
4910                 cs = gen12_emit_preempt_busywait(request, cs);
4911
4912         request->tail = intel_ring_offset(request, cs);
4913         assert_ring_tail_valid(request->ring, request->tail);
4914
4915         return gen8_emit_wa_tail(request, cs);
4916 }
4917
4918 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4919 {
4920         return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4921 }
4922
4923 static u32 *
4924 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4925 {
4926         cs = gen12_emit_ggtt_write_rcs(cs,
4927                                        request->fence.seqno,
4928                                        i915_request_active_timeline(request)->hwsp_offset,
4929                                        PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4930                                        PIPE_CONTROL_CS_STALL |
4931                                        PIPE_CONTROL_TILE_CACHE_FLUSH |
4932                                        PIPE_CONTROL_FLUSH_L3 |
4933                                        PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4934                                        PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4935                                        /* Wa_1409600907:tgl */
4936                                        PIPE_CONTROL_DEPTH_STALL |
4937                                        PIPE_CONTROL_DC_FLUSH_ENABLE |
4938                                        PIPE_CONTROL_FLUSH_ENABLE);
4939
4940         return gen12_emit_fini_breadcrumb_tail(request, cs);
4941 }
4942
4943 static void execlists_park(struct intel_engine_cs *engine)
4944 {
4945         cancel_timer(&engine->execlists.timer);
4946         cancel_timer(&engine->execlists.preempt);
4947 }
4948
4949 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4950 {
4951         engine->submit_request = execlists_submit_request;
4952         engine->schedule = i915_schedule;
4953         engine->execlists.tasklet.func = execlists_submission_tasklet;
4954
4955         engine->reset.prepare = execlists_reset_prepare;
4956         engine->reset.rewind = execlists_reset_rewind;
4957         engine->reset.cancel = execlists_reset_cancel;
4958         engine->reset.finish = execlists_reset_finish;
4959
4960         engine->park = execlists_park;
4961         engine->unpark = NULL;
4962
4963         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4964         if (!intel_vgpu_active(engine->i915)) {
4965                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4966                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
4967                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4968                         if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
4969                                 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4970                 }
4971         }
4972
4973         if (INTEL_GEN(engine->i915) >= 12)
4974                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4975
4976         if (intel_engine_has_preemption(engine))
4977                 engine->emit_bb_start = gen8_emit_bb_start;
4978         else
4979                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4980 }
4981
4982 static void execlists_shutdown(struct intel_engine_cs *engine)
4983 {
4984         /* Synchronise with residual timers and any softirq they raise */
4985         del_timer_sync(&engine->execlists.timer);
4986         del_timer_sync(&engine->execlists.preempt);
4987         tasklet_kill(&engine->execlists.tasklet);
4988 }
4989
4990 static void execlists_release(struct intel_engine_cs *engine)
4991 {
4992         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4993
4994         execlists_shutdown(engine);
4995
4996         intel_engine_cleanup_common(engine);
4997         lrc_destroy_wa_ctx(engine);
4998 }
4999
5000 static void
5001 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5002 {
5003         /* Default vfuncs which can be overriden by each engine. */
5004
5005         engine->resume = execlists_resume;
5006
5007         engine->cops = &execlists_context_ops;
5008         engine->request_alloc = execlists_request_alloc;
5009
5010         engine->emit_flush = gen8_emit_flush;
5011         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5012         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5013         if (INTEL_GEN(engine->i915) >= 12) {
5014                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5015                 engine->emit_flush = gen12_emit_flush;
5016         }
5017         engine->set_default_submission = intel_execlists_set_default_submission;
5018
5019         if (INTEL_GEN(engine->i915) < 11) {
5020                 engine->irq_enable = gen8_logical_ring_enable_irq;
5021                 engine->irq_disable = gen8_logical_ring_disable_irq;
5022         } else {
5023                 /*
5024                  * TODO: On Gen11 interrupt masks need to be clear
5025                  * to allow C6 entry. Keep interrupts enabled at
5026                  * and take the hit of generating extra interrupts
5027                  * until a more refined solution exists.
5028                  */
5029         }
5030 }
5031
5032 static inline void
5033 logical_ring_default_irqs(struct intel_engine_cs *engine)
5034 {
5035         unsigned int shift = 0;
5036
5037         if (INTEL_GEN(engine->i915) < 11) {
5038                 const u8 irq_shifts[] = {
5039                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
5040                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
5041                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5042                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5043                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
5044                 };
5045
5046                 shift = irq_shifts[engine->id];
5047         }
5048
5049         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5050         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5051         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5052         engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5053 }
5054
5055 static void rcs_submission_override(struct intel_engine_cs *engine)
5056 {
5057         switch (INTEL_GEN(engine->i915)) {
5058         case 12:
5059                 engine->emit_flush = gen12_emit_flush_render;
5060                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5061                 break;
5062         case 11:
5063                 engine->emit_flush = gen11_emit_flush_render;
5064                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5065                 break;
5066         default:
5067                 engine->emit_flush = gen8_emit_flush_render;
5068                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5069                 break;
5070         }
5071 }
5072
5073 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5074 {
5075         struct intel_engine_execlists * const execlists = &engine->execlists;
5076         struct drm_i915_private *i915 = engine->i915;
5077         struct intel_uncore *uncore = engine->uncore;
5078         u32 base = engine->mmio_base;
5079
5080         tasklet_init(&engine->execlists.tasklet,
5081                      execlists_submission_tasklet, (unsigned long)engine);
5082         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5083         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5084
5085         logical_ring_default_vfuncs(engine);
5086         logical_ring_default_irqs(engine);
5087
5088         if (engine->class == RENDER_CLASS)
5089                 rcs_submission_override(engine);
5090
5091         if (intel_init_workaround_bb(engine))
5092                 /*
5093                  * We continue even if we fail to initialize WA batch
5094                  * because we only expect rare glitches but nothing
5095                  * critical to prevent us from using GPU
5096                  */
5097                 drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5098
5099         if (HAS_LOGICAL_RING_ELSQ(i915)) {
5100                 execlists->submit_reg = uncore->regs +
5101                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5102                 execlists->ctrl_reg = uncore->regs +
5103                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5104         } else {
5105                 execlists->submit_reg = uncore->regs +
5106                         i915_mmio_reg_offset(RING_ELSP(base));
5107         }
5108
5109         execlists->csb_status =
5110                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5111
5112         execlists->csb_write =
5113                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5114
5115         if (INTEL_GEN(i915) < 11)
5116                 execlists->csb_size = GEN8_CSB_ENTRIES;
5117         else
5118                 execlists->csb_size = GEN11_CSB_ENTRIES;
5119
5120         if (INTEL_GEN(engine->i915) >= 11) {
5121                 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5122                 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5123         }
5124
5125         /* Finally, take ownership and responsibility for cleanup! */
5126         engine->sanitize = execlists_sanitize;
5127         engine->release = execlists_release;
5128
5129         return 0;
5130 }
5131
5132 static void init_common_reg_state(u32 * const regs,
5133                                   const struct intel_engine_cs *engine,
5134                                   const struct intel_ring *ring,
5135                                   bool inhibit)
5136 {
5137         u32 ctl;
5138
5139         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5140         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5141         if (inhibit)
5142                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5143         if (INTEL_GEN(engine->i915) < 11)
5144                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5145                                            CTX_CTRL_RS_CTX_ENABLE);
5146         regs[CTX_CONTEXT_CONTROL] = ctl;
5147
5148         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5149         regs[CTX_TIMESTAMP] = 0;
5150 }
5151
5152 static void init_wa_bb_reg_state(u32 * const regs,
5153                                  const struct intel_engine_cs *engine)
5154 {
5155         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5156
5157         if (wa_ctx->per_ctx.size) {
5158                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5159
5160                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5161                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5162                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5163         }
5164
5165         if (wa_ctx->indirect_ctx.size) {
5166                 lrc_ring_setup_indirect_ctx(regs, engine,
5167                                             i915_ggtt_offset(wa_ctx->vma) +
5168                                             wa_ctx->indirect_ctx.offset,
5169                                             wa_ctx->indirect_ctx.size);
5170         }
5171 }
5172
5173 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5174 {
5175         if (i915_vm_is_4lvl(&ppgtt->vm)) {
5176                 /* 64b PPGTT (48bit canonical)
5177                  * PDP0_DESCRIPTOR contains the base address to PML4 and
5178                  * other PDP Descriptors are ignored.
5179                  */
5180                 ASSIGN_CTX_PML4(ppgtt, regs);
5181         } else {
5182                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
5183                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
5184                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
5185                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
5186         }
5187 }
5188
5189 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5190 {
5191         if (i915_is_ggtt(vm))
5192                 return i915_vm_to_ggtt(vm)->alias;
5193         else
5194                 return i915_vm_to_ppgtt(vm);
5195 }
5196
5197 static void execlists_init_reg_state(u32 *regs,
5198                                      const struct intel_context *ce,
5199                                      const struct intel_engine_cs *engine,
5200                                      const struct intel_ring *ring,
5201                                      bool inhibit)
5202 {
5203         /*
5204          * A context is actually a big batch buffer with several
5205          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5206          * values we are setting here are only for the first context restore:
5207          * on a subsequent save, the GPU will recreate this batchbuffer with new
5208          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5209          * we are not initializing here).
5210          *
5211          * Must keep consistent with virtual_update_register_offsets().
5212          */
5213         set_offsets(regs, reg_offsets(engine), engine, inhibit);
5214
5215         init_common_reg_state(regs, engine, ring, inhibit);
5216         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5217
5218         init_wa_bb_reg_state(regs, engine);
5219
5220         __reset_stop_ring(regs, engine);
5221 }
5222
5223 static int
5224 populate_lr_context(struct intel_context *ce,
5225                     struct drm_i915_gem_object *ctx_obj,
5226                     struct intel_engine_cs *engine,
5227                     struct intel_ring *ring)
5228 {
5229         bool inhibit = true;
5230         void *vaddr;
5231
5232         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5233         if (IS_ERR(vaddr)) {
5234                 drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5235                 return PTR_ERR(vaddr);
5236         }
5237
5238         set_redzone(vaddr, engine);
5239
5240         if (engine->default_state) {
5241                 shmem_read(engine->default_state, 0,
5242                            vaddr, engine->context_size);
5243                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5244                 inhibit = false;
5245         }
5246
5247         /* Clear the ppHWSP (inc. per-context counters) */
5248         memset(vaddr, 0, PAGE_SIZE);
5249
5250         /*
5251          * The second page of the context object contains some registers which
5252          * must be set up prior to the first execution.
5253          */
5254         execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5255                                  ce, engine, ring, inhibit);
5256
5257         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5258         i915_gem_object_unpin_map(ctx_obj);
5259         return 0;
5260 }
5261
5262 static int __execlists_context_alloc(struct intel_context *ce,
5263                                      struct intel_engine_cs *engine)
5264 {
5265         struct drm_i915_gem_object *ctx_obj;
5266         struct intel_ring *ring;
5267         struct i915_vma *vma;
5268         u32 context_size;
5269         int ret;
5270
5271         GEM_BUG_ON(ce->state);
5272         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5273
5274         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5275                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5276
5277         if (INTEL_GEN(engine->i915) == 12) {
5278                 ce->wa_bb_page = context_size / PAGE_SIZE;
5279                 context_size += PAGE_SIZE;
5280         }
5281
5282         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5283         if (IS_ERR(ctx_obj))
5284                 return PTR_ERR(ctx_obj);
5285
5286         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5287         if (IS_ERR(vma)) {
5288                 ret = PTR_ERR(vma);
5289                 goto error_deref_obj;
5290         }
5291
5292         if (!ce->timeline) {
5293                 struct intel_timeline *tl;
5294                 struct i915_vma *hwsp;
5295
5296                 /*
5297                  * Use the static global HWSP for the kernel context, and
5298                  * a dynamically allocated cacheline for everyone else.
5299                  */
5300                 hwsp = NULL;
5301                 if (unlikely(intel_context_is_barrier(ce)))
5302                         hwsp = engine->status_page.vma;
5303
5304                 tl = intel_timeline_create(engine->gt, hwsp);
5305                 if (IS_ERR(tl)) {
5306                         ret = PTR_ERR(tl);
5307                         goto error_deref_obj;
5308                 }
5309
5310                 ce->timeline = tl;
5311         }
5312
5313         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5314         if (IS_ERR(ring)) {
5315                 ret = PTR_ERR(ring);
5316                 goto error_deref_obj;
5317         }
5318
5319         ret = populate_lr_context(ce, ctx_obj, engine, ring);
5320         if (ret) {
5321                 drm_dbg(&engine->i915->drm,
5322                         "Failed to populate LRC: %d\n", ret);
5323                 goto error_ring_free;
5324         }
5325
5326         ce->ring = ring;
5327         ce->state = vma;
5328
5329         return 0;
5330
5331 error_ring_free:
5332         intel_ring_put(ring);
5333 error_deref_obj:
5334         i915_gem_object_put(ctx_obj);
5335         return ret;
5336 }
5337
5338 static struct list_head *virtual_queue(struct virtual_engine *ve)
5339 {
5340         return &ve->base.execlists.default_priolist.requests[0];
5341 }
5342
5343 static void virtual_context_destroy(struct kref *kref)
5344 {
5345         struct virtual_engine *ve =
5346                 container_of(kref, typeof(*ve), context.ref);
5347         unsigned int n;
5348
5349         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5350         GEM_BUG_ON(ve->request);
5351         GEM_BUG_ON(ve->context.inflight);
5352
5353         for (n = 0; n < ve->num_siblings; n++) {
5354                 struct intel_engine_cs *sibling = ve->siblings[n];
5355                 struct rb_node *node = &ve->nodes[sibling->id].rb;
5356                 unsigned long flags;
5357
5358                 if (RB_EMPTY_NODE(node))
5359                         continue;
5360
5361                 spin_lock_irqsave(&sibling->active.lock, flags);
5362
5363                 /* Detachment is lazily performed in the execlists tasklet */
5364                 if (!RB_EMPTY_NODE(node))
5365                         rb_erase_cached(node, &sibling->execlists.virtual);
5366
5367                 spin_unlock_irqrestore(&sibling->active.lock, flags);
5368         }
5369         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5370
5371         if (ve->context.state)
5372                 __execlists_context_fini(&ve->context);
5373         intel_context_fini(&ve->context);
5374
5375         intel_engine_free_request_pool(&ve->base);
5376
5377         kfree(ve->bonds);
5378         kfree(ve);
5379 }
5380
5381 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5382 {
5383         int swp;
5384
5385         /*
5386          * Pick a random sibling on starting to help spread the load around.
5387          *
5388          * New contexts are typically created with exactly the same order
5389          * of siblings, and often started in batches. Due to the way we iterate
5390          * the array of sibling when submitting requests, sibling[0] is
5391          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5392          * randomised across the system, we also help spread the load by the
5393          * first engine we inspect being different each time.
5394          *
5395          * NB This does not force us to execute on this engine, it will just
5396          * typically be the first we inspect for submission.
5397          */
5398         swp = prandom_u32_max(ve->num_siblings);
5399         if (!swp)
5400                 return;
5401
5402         swap(ve->siblings[swp], ve->siblings[0]);
5403         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
5404                 virtual_update_register_offsets(ve->context.lrc_reg_state,
5405                                                 ve->siblings[0]);
5406 }
5407
5408 static int virtual_context_alloc(struct intel_context *ce)
5409 {
5410         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5411
5412         return __execlists_context_alloc(ce, ve->siblings[0]);
5413 }
5414
5415 static int virtual_context_pin(struct intel_context *ce)
5416 {
5417         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5418         int err;
5419
5420         /* Note: we must use a real engine class for setting up reg state */
5421         err = __execlists_context_pin(ce, ve->siblings[0]);
5422         if (err)
5423                 return err;
5424
5425         virtual_engine_initial_hint(ve);
5426         return 0;
5427 }
5428
5429 static void virtual_context_enter(struct intel_context *ce)
5430 {
5431         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5432         unsigned int n;
5433
5434         for (n = 0; n < ve->num_siblings; n++)
5435                 intel_engine_pm_get(ve->siblings[n]);
5436
5437         intel_timeline_enter(ce->timeline);
5438 }
5439
5440 static void virtual_context_exit(struct intel_context *ce)
5441 {
5442         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5443         unsigned int n;
5444
5445         intel_timeline_exit(ce->timeline);
5446
5447         for (n = 0; n < ve->num_siblings; n++)
5448                 intel_engine_pm_put(ve->siblings[n]);
5449 }
5450
5451 static const struct intel_context_ops virtual_context_ops = {
5452         .alloc = virtual_context_alloc,
5453
5454         .pin = virtual_context_pin,
5455         .unpin = execlists_context_unpin,
5456
5457         .enter = virtual_context_enter,
5458         .exit = virtual_context_exit,
5459
5460         .destroy = virtual_context_destroy,
5461 };
5462
5463 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5464 {
5465         struct i915_request *rq;
5466         intel_engine_mask_t mask;
5467
5468         rq = READ_ONCE(ve->request);
5469         if (!rq)
5470                 return 0;
5471
5472         /* The rq is ready for submission; rq->execution_mask is now stable. */
5473         mask = rq->execution_mask;
5474         if (unlikely(!mask)) {
5475                 /* Invalid selection, submit to a random engine in error */
5476                 i915_request_set_error_once(rq, -ENODEV);
5477                 mask = ve->siblings[0]->mask;
5478         }
5479
5480         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5481                      rq->fence.context, rq->fence.seqno,
5482                      mask, ve->base.execlists.queue_priority_hint);
5483
5484         return mask;
5485 }
5486
5487 static void virtual_submission_tasklet(unsigned long data)
5488 {
5489         struct virtual_engine * const ve = (struct virtual_engine *)data;
5490         const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5491         intel_engine_mask_t mask;
5492         unsigned int n;
5493
5494         rcu_read_lock();
5495         mask = virtual_submission_mask(ve);
5496         rcu_read_unlock();
5497         if (unlikely(!mask))
5498                 return;
5499
5500         local_irq_disable();
5501         for (n = 0; n < ve->num_siblings; n++) {
5502                 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5503                 struct ve_node * const node = &ve->nodes[sibling->id];
5504                 struct rb_node **parent, *rb;
5505                 bool first;
5506
5507                 if (!READ_ONCE(ve->request))
5508                         break; /* already handled by a sibling's tasklet */
5509
5510                 if (unlikely(!(mask & sibling->mask))) {
5511                         if (!RB_EMPTY_NODE(&node->rb)) {
5512                                 spin_lock(&sibling->active.lock);
5513                                 rb_erase_cached(&node->rb,
5514                                                 &sibling->execlists.virtual);
5515                                 RB_CLEAR_NODE(&node->rb);
5516                                 spin_unlock(&sibling->active.lock);
5517                         }
5518                         continue;
5519                 }
5520
5521                 spin_lock(&sibling->active.lock);
5522
5523                 if (!RB_EMPTY_NODE(&node->rb)) {
5524                         /*
5525                          * Cheat and avoid rebalancing the tree if we can
5526                          * reuse this node in situ.
5527                          */
5528                         first = rb_first_cached(&sibling->execlists.virtual) ==
5529                                 &node->rb;
5530                         if (prio == node->prio || (prio > node->prio && first))
5531                                 goto submit_engine;
5532
5533                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5534                 }
5535
5536                 rb = NULL;
5537                 first = true;
5538                 parent = &sibling->execlists.virtual.rb_root.rb_node;
5539                 while (*parent) {
5540                         struct ve_node *other;
5541
5542                         rb = *parent;
5543                         other = rb_entry(rb, typeof(*other), rb);
5544                         if (prio > other->prio) {
5545                                 parent = &rb->rb_left;
5546                         } else {
5547                                 parent = &rb->rb_right;
5548                                 first = false;
5549                         }
5550                 }
5551
5552                 rb_link_node(&node->rb, rb, parent);
5553                 rb_insert_color_cached(&node->rb,
5554                                        &sibling->execlists.virtual,
5555                                        first);
5556
5557 submit_engine:
5558                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5559                 node->prio = prio;
5560                 if (first && prio > sibling->execlists.queue_priority_hint)
5561                         tasklet_hi_schedule(&sibling->execlists.tasklet);
5562
5563                 spin_unlock(&sibling->active.lock);
5564         }
5565         local_irq_enable();
5566 }
5567
5568 static void virtual_submit_request(struct i915_request *rq)
5569 {
5570         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5571         struct i915_request *old;
5572         unsigned long flags;
5573
5574         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5575                      rq->fence.context,
5576                      rq->fence.seqno);
5577
5578         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5579
5580         spin_lock_irqsave(&ve->base.active.lock, flags);
5581
5582         old = ve->request;
5583         if (old) { /* background completion event from preempt-to-busy */
5584                 GEM_BUG_ON(!i915_request_completed(old));
5585                 __i915_request_submit(old);
5586                 i915_request_put(old);
5587         }
5588
5589         if (i915_request_completed(rq)) {
5590                 __i915_request_submit(rq);
5591
5592                 ve->base.execlists.queue_priority_hint = INT_MIN;
5593                 ve->request = NULL;
5594         } else {
5595                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5596                 ve->request = i915_request_get(rq);
5597
5598                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5599                 list_move_tail(&rq->sched.link, virtual_queue(ve));
5600
5601                 tasklet_schedule(&ve->base.execlists.tasklet);
5602         }
5603
5604         spin_unlock_irqrestore(&ve->base.active.lock, flags);
5605 }
5606
5607 static struct ve_bond *
5608 virtual_find_bond(struct virtual_engine *ve,
5609                   const struct intel_engine_cs *master)
5610 {
5611         int i;
5612
5613         for (i = 0; i < ve->num_bonds; i++) {
5614                 if (ve->bonds[i].master == master)
5615                         return &ve->bonds[i];
5616         }
5617
5618         return NULL;
5619 }
5620
5621 static void
5622 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5623 {
5624         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5625         intel_engine_mask_t allowed, exec;
5626         struct ve_bond *bond;
5627
5628         allowed = ~to_request(signal)->engine->mask;
5629
5630         bond = virtual_find_bond(ve, to_request(signal)->engine);
5631         if (bond)
5632                 allowed &= bond->sibling_mask;
5633
5634         /* Restrict the bonded request to run on only the available engines */
5635         exec = READ_ONCE(rq->execution_mask);
5636         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5637                 ;
5638
5639         /* Prevent the master from being re-run on the bonded engines */
5640         to_request(signal)->execution_mask &= ~allowed;
5641 }
5642
5643 struct intel_context *
5644 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5645                                unsigned int count)
5646 {
5647         struct virtual_engine *ve;
5648         unsigned int n;
5649         int err;
5650
5651         if (count == 0)
5652                 return ERR_PTR(-EINVAL);
5653
5654         if (count == 1)
5655                 return intel_context_create(siblings[0]);
5656
5657         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5658         if (!ve)
5659                 return ERR_PTR(-ENOMEM);
5660
5661         ve->base.i915 = siblings[0]->i915;
5662         ve->base.gt = siblings[0]->gt;
5663         ve->base.uncore = siblings[0]->uncore;
5664         ve->base.id = -1;
5665
5666         ve->base.class = OTHER_CLASS;
5667         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5668         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5669         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5670
5671         /*
5672          * The decision on whether to submit a request using semaphores
5673          * depends on the saturated state of the engine. We only compute
5674          * this during HW submission of the request, and we need for this
5675          * state to be globally applied to all requests being submitted
5676          * to this engine. Virtual engines encompass more than one physical
5677          * engine and so we cannot accurately tell in advance if one of those
5678          * engines is already saturated and so cannot afford to use a semaphore
5679          * and be pessimized in priority for doing so -- if we are the only
5680          * context using semaphores after all other clients have stopped, we
5681          * will be starved on the saturated system. Such a global switch for
5682          * semaphores is less than ideal, but alas is the current compromise.
5683          */
5684         ve->base.saturated = ALL_ENGINES;
5685
5686         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5687
5688         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5689         intel_engine_init_breadcrumbs(&ve->base);
5690         intel_engine_init_execlists(&ve->base);
5691
5692         ve->base.cops = &virtual_context_ops;
5693         ve->base.request_alloc = execlists_request_alloc;
5694
5695         ve->base.schedule = i915_schedule;
5696         ve->base.submit_request = virtual_submit_request;
5697         ve->base.bond_execute = virtual_bond_execute;
5698
5699         INIT_LIST_HEAD(virtual_queue(ve));
5700         ve->base.execlists.queue_priority_hint = INT_MIN;
5701         tasklet_init(&ve->base.execlists.tasklet,
5702                      virtual_submission_tasklet,
5703                      (unsigned long)ve);
5704
5705         intel_context_init(&ve->context, &ve->base);
5706
5707         for (n = 0; n < count; n++) {
5708                 struct intel_engine_cs *sibling = siblings[n];
5709
5710                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5711                 if (sibling->mask & ve->base.mask) {
5712                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5713                                   sibling->name);
5714                         err = -EINVAL;
5715                         goto err_put;
5716                 }
5717
5718                 /*
5719                  * The virtual engine implementation is tightly coupled to
5720                  * the execlists backend -- we push out request directly
5721                  * into a tree inside each physical engine. We could support
5722                  * layering if we handle cloning of the requests and
5723                  * submitting a copy into each backend.
5724                  */
5725                 if (sibling->execlists.tasklet.func !=
5726                     execlists_submission_tasklet) {
5727                         err = -ENODEV;
5728                         goto err_put;
5729                 }
5730
5731                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5732                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5733
5734                 ve->siblings[ve->num_siblings++] = sibling;
5735                 ve->base.mask |= sibling->mask;
5736
5737                 /*
5738                  * All physical engines must be compatible for their emission
5739                  * functions (as we build the instructions during request
5740                  * construction and do not alter them before submission
5741                  * on the physical engine). We use the engine class as a guide
5742                  * here, although that could be refined.
5743                  */
5744                 if (ve->base.class != OTHER_CLASS) {
5745                         if (ve->base.class != sibling->class) {
5746                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5747                                           sibling->class, ve->base.class);
5748                                 err = -EINVAL;
5749                                 goto err_put;
5750                         }
5751                         continue;
5752                 }
5753
5754                 ve->base.class = sibling->class;
5755                 ve->base.uabi_class = sibling->uabi_class;
5756                 snprintf(ve->base.name, sizeof(ve->base.name),
5757                          "v%dx%d", ve->base.class, count);
5758                 ve->base.context_size = sibling->context_size;
5759
5760                 ve->base.emit_bb_start = sibling->emit_bb_start;
5761                 ve->base.emit_flush = sibling->emit_flush;
5762                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5763                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5764                 ve->base.emit_fini_breadcrumb_dw =
5765                         sibling->emit_fini_breadcrumb_dw;
5766
5767                 ve->base.flags = sibling->flags;
5768         }
5769
5770         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5771
5772         return &ve->context;
5773
5774 err_put:
5775         intel_context_put(&ve->context);
5776         return ERR_PTR(err);
5777 }
5778
5779 struct intel_context *
5780 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5781 {
5782         struct virtual_engine *se = to_virtual_engine(src);
5783         struct intel_context *dst;
5784
5785         dst = intel_execlists_create_virtual(se->siblings,
5786                                              se->num_siblings);
5787         if (IS_ERR(dst))
5788                 return dst;
5789
5790         if (se->num_bonds) {
5791                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5792
5793                 de->bonds = kmemdup(se->bonds,
5794                                     sizeof(*se->bonds) * se->num_bonds,
5795                                     GFP_KERNEL);
5796                 if (!de->bonds) {
5797                         intel_context_put(dst);
5798                         return ERR_PTR(-ENOMEM);
5799                 }
5800
5801                 de->num_bonds = se->num_bonds;
5802         }
5803
5804         return dst;
5805 }
5806
5807 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5808                                      const struct intel_engine_cs *master,
5809                                      const struct intel_engine_cs *sibling)
5810 {
5811         struct virtual_engine *ve = to_virtual_engine(engine);
5812         struct ve_bond *bond;
5813         int n;
5814
5815         /* Sanity check the sibling is part of the virtual engine */
5816         for (n = 0; n < ve->num_siblings; n++)
5817                 if (sibling == ve->siblings[n])
5818                         break;
5819         if (n == ve->num_siblings)
5820                 return -EINVAL;
5821
5822         bond = virtual_find_bond(ve, master);
5823         if (bond) {
5824                 bond->sibling_mask |= sibling->mask;
5825                 return 0;
5826         }
5827
5828         bond = krealloc(ve->bonds,
5829                         sizeof(*bond) * (ve->num_bonds + 1),
5830                         GFP_KERNEL);
5831         if (!bond)
5832                 return -ENOMEM;
5833
5834         bond[ve->num_bonds].master = master;
5835         bond[ve->num_bonds].sibling_mask = sibling->mask;
5836
5837         ve->bonds = bond;
5838         ve->num_bonds++;
5839
5840         return 0;
5841 }
5842
5843 struct intel_engine_cs *
5844 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5845                                  unsigned int sibling)
5846 {
5847         struct virtual_engine *ve = to_virtual_engine(engine);
5848
5849         if (sibling >= ve->num_siblings)
5850                 return NULL;
5851
5852         return ve->siblings[sibling];
5853 }
5854
5855 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5856                                    struct drm_printer *m,
5857                                    void (*show_request)(struct drm_printer *m,
5858                                                         struct i915_request *rq,
5859                                                         const char *prefix),
5860                                    unsigned int max)
5861 {
5862         const struct intel_engine_execlists *execlists = &engine->execlists;
5863         struct i915_request *rq, *last;
5864         unsigned long flags;
5865         unsigned int count;
5866         struct rb_node *rb;
5867
5868         spin_lock_irqsave(&engine->active.lock, flags);
5869
5870         last = NULL;
5871         count = 0;
5872         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5873                 if (count++ < max - 1)
5874                         show_request(m, rq, "\t\tE ");
5875                 else
5876                         last = rq;
5877         }
5878         if (last) {
5879                 if (count > max) {
5880                         drm_printf(m,
5881                                    "\t\t...skipping %d executing requests...\n",
5882                                    count - max);
5883                 }
5884                 show_request(m, last, "\t\tE ");
5885         }
5886
5887         if (execlists->switch_priority_hint != INT_MIN)
5888                 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5889                            READ_ONCE(execlists->switch_priority_hint));
5890         if (execlists->queue_priority_hint != INT_MIN)
5891                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5892                            READ_ONCE(execlists->queue_priority_hint));
5893
5894         last = NULL;
5895         count = 0;
5896         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5897                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5898                 int i;
5899
5900                 priolist_for_each_request(rq, p, i) {
5901                         if (count++ < max - 1)
5902                                 show_request(m, rq, "\t\tQ ");
5903                         else
5904                                 last = rq;
5905                 }
5906         }
5907         if (last) {
5908                 if (count > max) {
5909                         drm_printf(m,
5910                                    "\t\t...skipping %d queued requests...\n",
5911                                    count - max);
5912                 }
5913                 show_request(m, last, "\t\tQ ");
5914         }
5915
5916         last = NULL;
5917         count = 0;
5918         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5919                 struct virtual_engine *ve =
5920                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5921                 struct i915_request *rq = READ_ONCE(ve->request);
5922
5923                 if (rq) {
5924                         if (count++ < max - 1)
5925                                 show_request(m, rq, "\t\tV ");
5926                         else
5927                                 last = rq;
5928                 }
5929         }
5930         if (last) {
5931                 if (count > max) {
5932                         drm_printf(m,
5933                                    "\t\t...skipping %d virtual requests...\n",
5934                                    count - max);
5935                 }
5936                 show_request(m, last, "\t\tV ");
5937         }
5938
5939         spin_unlock_irqrestore(&engine->active.lock, flags);
5940 }
5941
5942 void intel_lr_context_reset(struct intel_engine_cs *engine,
5943                             struct intel_context *ce,
5944                             u32 head,
5945                             bool scrub)
5946 {
5947         GEM_BUG_ON(!intel_context_is_pinned(ce));
5948
5949         /*
5950          * We want a simple context + ring to execute the breadcrumb update.
5951          * We cannot rely on the context being intact across the GPU hang,
5952          * so clear it and rebuild just what we need for the breadcrumb.
5953          * All pending requests for this context will be zapped, and any
5954          * future request will be after userspace has had the opportunity
5955          * to recreate its own state.
5956          */
5957         if (scrub)
5958                 restore_default_state(ce, engine);
5959
5960         /* Rerun the request; its payload has been neutered (if guilty). */
5961         __execlists_update_reg_state(ce, engine, head);
5962 }
5963
5964 bool
5965 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5966 {
5967         return engine->set_default_submission ==
5968                intel_execlists_set_default_submission;
5969 }
5970
5971 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5972 #include "selftest_lrc.c"
5973 #endif