drivers/gpu/drm/i915/gt/selftest_lrc.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "gem/i915_gem_pm.h"
  10 #include "gt/intel_engine_heartbeat.h"
  11 #include "gt/intel_reset.h"
  12
  13 #include "i915_selftest.h"
  14 #include "selftests/i915_random.h"
  15 #include "selftests/igt_flush_test.h"
  16 #include "selftests/igt_live_test.h"
  17 #include "selftests/igt_spinner.h"
  18 #include "selftests/lib_sw_fence.h"
  19
  20 #include "gem/selftests/igt_gem_utils.h"
  21 #include "gem/selftests/mock_context.h"
  22
  23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  24 #define NUM_GPR 16
  25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
  26
  27 static struct i915_vma *create_scratch(struct intel_gt *gt)
  28 {
  29         struct drm_i915_gem_object *obj;
  30         struct i915_vma *vma;
  31         int err;
  32
  33         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
  34         if (IS_ERR(obj))
  35                 return ERR_CAST(obj);
  36
  37         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
  38
  39         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  40         if (IS_ERR(vma)) {
  41                 i915_gem_object_put(obj);
  42                 return vma;
  43         }
  44
  45         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
  46         if (err) {
  47                 i915_gem_object_put(obj);
  48                 return ERR_PTR(err);
  49         }
  50
  51         return vma;
  52 }
  53
  54 static void engine_heartbeat_disable(struct intel_engine_cs *engine)
  55 {
  56         engine->props.heartbeat_interval_ms = 0;
  57
  58         intel_engine_pm_get(engine);
  59         intel_engine_park_heartbeat(engine);
  60 }
  61
  62 static void engine_heartbeat_enable(struct intel_engine_cs *engine)
  63 {
  64         intel_engine_pm_put(engine);
  65
  66         engine->props.heartbeat_interval_ms =
  67                 engine->defaults.heartbeat_interval_ms;
  68 }
  69
  70 static bool is_active(struct i915_request *rq)
  71 {
  72         if (i915_request_is_active(rq))
  73                 return true;
  74
  75         if (i915_request_on_hold(rq))
  76                 return true;
  77
  78         if (i915_request_started(rq))
  79                 return true;
  80
  81         return false;
  82 }
  83
  84 static int wait_for_submit(struct intel_engine_cs *engine,
  85                            struct i915_request *rq,
  86                            unsigned long timeout)
  87 {
  88         timeout += jiffies;
  89         do {
  90                 bool done = time_after(jiffies, timeout);
  91
  92                 if (i915_request_completed(rq)) /* that was quick! */
  93                         return 0;
  94
  95                 /* Wait until the HW has acknowleged the submission (or err) */
  96                 intel_engine_flush_submission(engine);
  97                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
  98                         return 0;
  99
 100                 if (done)
 101                         return -ETIME;
 102
 103                 cond_resched();
 104         } while (1);
 105 }
 106
 107 static int wait_for_reset(struct intel_engine_cs *engine,
 108                           struct i915_request *rq,
 109                           unsigned long timeout)
 110 {
 111         timeout += jiffies;
 112
 113         do {
 114                 cond_resched();
 115                 intel_engine_flush_submission(engine);
 116
 117                 if (READ_ONCE(engine->execlists.pending[0]))
 118                         continue;
 119
 120                 if (i915_request_completed(rq))
 121                         break;
 122
 123                 if (READ_ONCE(rq->fence.error))
 124                         break;
 125         } while (time_before(jiffies, timeout));
 126
 127         flush_scheduled_work();
 128
 129         if (rq->fence.error != -EIO) {
 130                 pr_err("%s: hanging request %llx:%lld not reset\n",
 131                        engine->name,
 132                        rq->fence.context,
 133                        rq->fence.seqno);
 134                 return -EINVAL;
 135         }
 136
 137         /* Give the request a jiffie to complete after flushing the worker */
 138         if (i915_request_wait(rq, 0,
 139                               max(0l, (long)(timeout - jiffies)) + 1) < 0) {
 140                 pr_err("%s: hanging request %llx:%lld did not complete\n",
 141                        engine->name,
 142                        rq->fence.context,
 143                        rq->fence.seqno);
 144                 return -ETIME;
 145         }
 146
 147         return 0;
 148 }
 149
 150 static int live_sanitycheck(void *arg)
 151 {
 152         struct intel_gt *gt = arg;
 153         struct intel_engine_cs *engine;
 154         enum intel_engine_id id;
 155         struct igt_spinner spin;
 156         int err = 0;
 157
 158         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 159                 return 0;
 160
 161         if (igt_spinner_init(&spin, gt))
 162                 return -ENOMEM;
 163
 164         for_each_engine(engine, gt, id) {
 165                 struct intel_context *ce;
 166                 struct i915_request *rq;
 167
 168                 ce = intel_context_create(engine);
 169                 if (IS_ERR(ce)) {
 170                         err = PTR_ERR(ce);
 171                         break;
 172                 }
 173
 174                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 175                 if (IS_ERR(rq)) {
 176                         err = PTR_ERR(rq);
 177                         goto out_ctx;
 178                 }
 179
 180                 i915_request_add(rq);
 181                 if (!igt_wait_for_spinner(&spin, rq)) {
 182                         GEM_TRACE("spinner failed to start\n");
 183                         GEM_TRACE_DUMP();
 184                         intel_gt_set_wedged(gt);
 185                         err = -EIO;
 186                         goto out_ctx;
 187                 }
 188
 189                 igt_spinner_end(&spin);
 190                 if (igt_flush_test(gt->i915)) {
 191                         err = -EIO;
 192                         goto out_ctx;
 193                 }
 194
 195 out_ctx:
 196                 intel_context_put(ce);
 197                 if (err)
 198                         break;
 199         }
 200
 201         igt_spinner_fini(&spin);
 202         return err;
 203 }
 204
 205 static int live_unlite_restore(struct intel_gt *gt, int prio)
 206 {
 207         struct intel_engine_cs *engine;
 208         enum intel_engine_id id;
 209         struct igt_spinner spin;
 210         int err = -ENOMEM;
 211
 212         /*
 213          * Check that we can correctly context switch between 2 instances
 214          * on the same engine from the same parent context.
 215          */
 216
 217         if (igt_spinner_init(&spin, gt))
 218                 return err;
 219
 220         err = 0;
 221         for_each_engine(engine, gt, id) {
 222                 struct intel_context *ce[2] = {};
 223                 struct i915_request *rq[2];
 224                 struct igt_live_test t;
 225                 int n;
 226
 227                 if (prio && !intel_engine_has_preemption(engine))
 228                         continue;
 229
 230                 if (!intel_engine_can_store_dword(engine))
 231                         continue;
 232
 233                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 234                         err = -EIO;
 235                         break;
 236                 }
 237                 engine_heartbeat_disable(engine);
 238
 239                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 240                         struct intel_context *tmp;
 241
 242                         tmp = intel_context_create(engine);
 243                         if (IS_ERR(tmp)) {
 244                                 err = PTR_ERR(tmp);
 245                                 goto err_ce;
 246                         }
 247
 248                         err = intel_context_pin(tmp);
 249                         if (err) {
 250                                 intel_context_put(tmp);
 251                                 goto err_ce;
 252                         }
 253
 254                         /*
 255                          * Setup the pair of contexts such that if we
 256                          * lite-restore using the RING_TAIL from ce[1] it
 257                          * will execute garbage from ce[0]->ring.
 258                          */
 259                         memset(tmp->ring->vaddr,
 260                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 261                                tmp->ring->vma->size);
 262
 263                         ce[n] = tmp;
 264                 }
 265                 GEM_BUG_ON(!ce[1]->ring->size);
 266                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 267                 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
 268
 269                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 270                 if (IS_ERR(rq[0])) {
 271                         err = PTR_ERR(rq[0]);
 272                         goto err_ce;
 273                 }
 274
 275                 i915_request_get(rq[0]);
 276                 i915_request_add(rq[0]);
 277                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 278
 279                 if (!igt_wait_for_spinner(&spin, rq[0])) {
 280                         i915_request_put(rq[0]);
 281                         goto err_ce;
 282                 }
 283
 284                 rq[1] = i915_request_create(ce[1]);
 285                 if (IS_ERR(rq[1])) {
 286                         err = PTR_ERR(rq[1]);
 287                         i915_request_put(rq[0]);
 288                         goto err_ce;
 289                 }
 290
 291                 if (!prio) {
 292                         /*
 293                          * Ensure we do the switch to ce[1] on completion.
 294                          *
 295                          * rq[0] is already submitted, so this should reduce
 296                          * to a no-op (a wait on a request on the same engine
 297                          * uses the submit fence, not the completion fence),
 298                          * but it will install a dependency on rq[1] for rq[0]
 299                          * that will prevent the pair being reordered by
 300                          * timeslicing.
 301                          */
 302                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 303                 }
 304
 305                 i915_request_get(rq[1]);
 306                 i915_request_add(rq[1]);
 307                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 308                 i915_request_put(rq[0]);
 309
 310                 if (prio) {
 311                         struct i915_sched_attr attr = {
 312                                 .priority = prio,
 313                         };
 314
 315                         /* Alternatively preempt the spinner with ce[1] */
 316                         engine->schedule(rq[1], &attr);
 317                 }
 318
 319                 /* And switch back to ce[0] for good measure */
 320                 rq[0] = i915_request_create(ce[0]);
 321                 if (IS_ERR(rq[0])) {
 322                         err = PTR_ERR(rq[0]);
 323                         i915_request_put(rq[1]);
 324                         goto err_ce;
 325                 }
 326
 327                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 328                 i915_request_get(rq[0]);
 329                 i915_request_add(rq[0]);
 330                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 331                 i915_request_put(rq[1]);
 332                 i915_request_put(rq[0]);
 333
 334 err_ce:
 335                 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
 336                 igt_spinner_end(&spin);
 337                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 338                         if (IS_ERR_OR_NULL(ce[n]))
 339                                 break;
 340
 341                         intel_context_unpin(ce[n]);
 342                         intel_context_put(ce[n]);
 343                 }
 344
 345                 engine_heartbeat_enable(engine);
 346                 if (igt_live_test_end(&t))
 347                         err = -EIO;
 348                 if (err)
 349                         break;
 350         }
 351
 352         igt_spinner_fini(&spin);
 353         return err;
 354 }
 355
 356 static int live_unlite_switch(void *arg)
 357 {
 358         return live_unlite_restore(arg, 0);
 359 }
 360
 361 static int live_unlite_preempt(void *arg)
 362 {
 363         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 364 }
 365
 366 static int live_pin_rewind(void *arg)
 367 {
 368         struct intel_gt *gt = arg;
 369         struct intel_engine_cs *engine;
 370         enum intel_engine_id id;
 371         int err = 0;
 372
 373         /*
 374          * We have to be careful not to trust intel_ring too much, for example
 375          * ring->head is updated upon retire which is out of sync with pinning
 376          * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
 377          * or else we risk writing an older, stale value.
 378          *
 379          * To simulate this, let's apply a bit of deliberate sabotague.
 380          */
 381
 382         for_each_engine(engine, gt, id) {
 383                 struct intel_context *ce;
 384                 struct i915_request *rq;
 385                 struct intel_ring *ring;
 386                 struct igt_live_test t;
 387
 388                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 389                         err = -EIO;
 390                         break;
 391                 }
 392
 393                 ce = intel_context_create(engine);
 394                 if (IS_ERR(ce)) {
 395                         err = PTR_ERR(ce);
 396                         break;
 397                 }
 398
 399                 err = intel_context_pin(ce);
 400                 if (err) {
 401                         intel_context_put(ce);
 402                         break;
 403                 }
 404
 405                 /* Keep the context awake while we play games */
 406                 err = i915_active_acquire(&ce->active);
 407                 if (err) {
 408                         intel_context_unpin(ce);
 409                         intel_context_put(ce);
 410                         break;
 411                 }
 412                 ring = ce->ring;
 413
 414                 /* Poison the ring, and offset the next request from HEAD */
 415                 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
 416                 ring->emit = ring->size / 2;
 417                 ring->tail = ring->emit;
 418                 GEM_BUG_ON(ring->head);
 419
 420                 intel_context_unpin(ce);
 421
 422                 /* Submit a simple nop request */
 423                 GEM_BUG_ON(intel_context_is_pinned(ce));
 424                 rq = intel_context_create_request(ce);
 425                 i915_active_release(&ce->active); /* e.g. async retire */
 426                 intel_context_put(ce);
 427                 if (IS_ERR(rq)) {
 428                         err = PTR_ERR(rq);
 429                         break;
 430                 }
 431                 GEM_BUG_ON(!rq->head);
 432                 i915_request_add(rq);
 433
 434                 /* Expect not to hang! */
 435                 if (igt_live_test_end(&t)) {
 436                         err = -EIO;
 437                         break;
 438                 }
 439         }
 440
 441         return err;
 442 }
 443
 444 static int live_hold_reset(void *arg)
 445 {
 446         struct intel_gt *gt = arg;
 447         struct intel_engine_cs *engine;
 448         enum intel_engine_id id;
 449         struct igt_spinner spin;
 450         int err = 0;
 451
 452         /*
 453          * In order to support offline error capture for fast preempt reset,
 454          * we need to decouple the guilty request and ensure that it and its
 455          * descendents are not executed while the capture is in progress.
 456          */
 457
 458         if (!intel_has_reset_engine(gt))
 459                 return 0;
 460
 461         if (igt_spinner_init(&spin, gt))
 462                 return -ENOMEM;
 463
 464         for_each_engine(engine, gt, id) {
 465                 struct intel_context *ce;
 466                 struct i915_request *rq;
 467
 468                 ce = intel_context_create(engine);
 469                 if (IS_ERR(ce)) {
 470                         err = PTR_ERR(ce);
 471                         break;
 472                 }
 473
 474                 engine_heartbeat_disable(engine);
 475
 476                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 477                 if (IS_ERR(rq)) {
 478                         err = PTR_ERR(rq);
 479                         goto out;
 480                 }
 481                 i915_request_add(rq);
 482
 483                 if (!igt_wait_for_spinner(&spin, rq)) {
 484                         intel_gt_set_wedged(gt);
 485                         err = -ETIME;
 486                         goto out;
 487                 }
 488
 489                 /* We have our request executing, now remove it and reset */
 490
 491                 if (test_and_set_bit(I915_RESET_ENGINE + id,
 492                                      &gt->reset.flags)) {
 493                         intel_gt_set_wedged(gt);
 494                         err = -EBUSY;
 495                         goto out;
 496                 }
 497                 tasklet_disable(&engine->execlists.tasklet);
 498
 499                 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
 500                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 501
 502                 i915_request_get(rq);
 503                 execlists_hold(engine, rq);
 504                 GEM_BUG_ON(!i915_request_on_hold(rq));
 505
 506                 intel_engine_reset(engine, NULL);
 507                 GEM_BUG_ON(rq->fence.error != -EIO);
 508
 509                 tasklet_enable(&engine->execlists.tasklet);
 510                 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
 511                                       &gt->reset.flags);
 512
 513                 /* Check that we do not resubmit the held request */
 514                 if (!i915_request_wait(rq, 0, HZ / 5)) {
 515                         pr_err("%s: on hold request completed!\n",
 516                                engine->name);
 517                         i915_request_put(rq);
 518                         err = -EIO;
 519                         goto out;
 520                 }
 521                 GEM_BUG_ON(!i915_request_on_hold(rq));
 522
 523                 /* But is resubmitted on release */
 524                 execlists_unhold(engine, rq);
 525                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 526                         pr_err("%s: held request did not complete!\n",
 527                                engine->name);
 528                         intel_gt_set_wedged(gt);
 529                         err = -ETIME;
 530                 }
 531                 i915_request_put(rq);
 532
 533 out:
 534                 engine_heartbeat_enable(engine);
 535                 intel_context_put(ce);
 536                 if (err)
 537                         break;
 538         }
 539
 540         igt_spinner_fini(&spin);
 541         return err;
 542 }
 543
 544 static const char *error_repr(int err)
 545 {
 546         return err ? "bad" : "good";
 547 }
 548
 549 static int live_error_interrupt(void *arg)
 550 {
 551         static const struct error_phase {
 552                 enum { GOOD = 0, BAD = -EIO } error[2];
 553         } phases[] = {
 554                 { { BAD,  GOOD } },
 555                 { { BAD,  BAD  } },
 556                 { { BAD,  GOOD } },
 557                 { { GOOD, GOOD } }, /* sentinel */
 558         };
 559         struct intel_gt *gt = arg;
 560         struct intel_engine_cs *engine;
 561         enum intel_engine_id id;
 562
 563         /*
 564          * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
 565          * of invalid commands in user batches that will cause a GPU hang.
 566          * This is a faster mechanism than using hangcheck/heartbeats, but
 567          * only detects problems the HW knows about -- it will not warn when
 568          * we kill the HW!
 569          *
 570          * To verify our detection and reset, we throw some invalid commands
 571          * at the HW and wait for the interrupt.
 572          */
 573
 574         if (!intel_has_reset_engine(gt))
 575                 return 0;
 576
 577         for_each_engine(engine, gt, id) {
 578                 const struct error_phase *p;
 579                 int err = 0;
 580
 581                 engine_heartbeat_disable(engine);
 582
 583                 for (p = phases; p->error[0] != GOOD; p++) {
 584                         struct i915_request *client[ARRAY_SIZE(phases->error)];
 585                         u32 *cs;
 586                         int i;
 587
 588                         memset(client, 0, sizeof(*client));
 589                         for (i = 0; i < ARRAY_SIZE(client); i++) {
 590                                 struct intel_context *ce;
 591                                 struct i915_request *rq;
 592
 593                                 ce = intel_context_create(engine);
 594                                 if (IS_ERR(ce)) {
 595                                         err = PTR_ERR(ce);
 596                                         goto out;
 597                                 }
 598
 599                                 rq = intel_context_create_request(ce);
 600                                 intel_context_put(ce);
 601                                 if (IS_ERR(rq)) {
 602                                         err = PTR_ERR(rq);
 603                                         goto out;
 604                                 }
 605
 606                                 if (rq->engine->emit_init_breadcrumb) {
 607                                         err = rq->engine->emit_init_breadcrumb(rq);
 608                                         if (err) {
 609                                                 i915_request_add(rq);
 610                                                 goto out;
 611                                         }
 612                                 }
 613
 614                                 cs = intel_ring_begin(rq, 2);
 615                                 if (IS_ERR(cs)) {
 616                                         i915_request_add(rq);
 617                                         err = PTR_ERR(cs);
 618                                         goto out;
 619                                 }
 620
 621                                 if (p->error[i]) {
 622                                         *cs++ = 0xdeadbeef;
 623                                         *cs++ = 0xdeadbeef;
 624                                 } else {
 625                                         *cs++ = MI_NOOP;
 626                                         *cs++ = MI_NOOP;
 627                                 }
 628
 629                                 client[i] = i915_request_get(rq);
 630                                 i915_request_add(rq);
 631                         }
 632
 633                         err = wait_for_submit(engine, client[0], HZ / 2);
 634                         if (err) {
 635                                 pr_err("%s: first request did not start within time!\n",
 636                                        engine->name);
 637                                 err = -ETIME;
 638                                 goto out;
 639                         }
 640
 641                         for (i = 0; i < ARRAY_SIZE(client); i++) {
 642                                 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
 643                                         pr_debug("%s: %s request incomplete!\n",
 644                                                  engine->name,
 645                                                  error_repr(p->error[i]));
 646
 647                                 if (!i915_request_started(client[i])) {
 648                                         pr_err("%s: %s request not started!\n",
 649                                                engine->name,
 650                                                error_repr(p->error[i]));
 651                                         err = -ETIME;
 652                                         goto out;
 653                                 }
 654
 655                                 /* Kick the tasklet to process the error */
 656                                 intel_engine_flush_submission(engine);
 657                                 if (client[i]->fence.error != p->error[i]) {
 658                                         pr_err("%s: %s request (%s) with wrong error code: %d\n",
 659                                                engine->name,
 660                                                error_repr(p->error[i]),
 661                                                i915_request_completed(client[i]) ? "completed" : "running",
 662                                                client[i]->fence.error);
 663                                         err = -EINVAL;
 664                                         goto out;
 665                                 }
 666                         }
 667
 668 out:
 669                         for (i = 0; i < ARRAY_SIZE(client); i++)
 670                                 if (client[i])
 671                                         i915_request_put(client[i]);
 672                         if (err) {
 673                                 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
 674                                        engine->name, p - phases,
 675                                        p->error[0], p->error[1]);
 676                                 break;
 677                         }
 678                 }
 679
 680                 engine_heartbeat_enable(engine);
 681                 if (err) {
 682                         intel_gt_set_wedged(gt);
 683                         return err;
 684                 }
 685         }
 686
 687         return 0;
 688 }
 689
 690 static int
 691 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 692 {
 693         u32 *cs;
 694
 695         cs = intel_ring_begin(rq, 10);
 696         if (IS_ERR(cs))
 697                 return PTR_ERR(cs);
 698
 699         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 700
 701         *cs++ = MI_SEMAPHORE_WAIT |
 702                 MI_SEMAPHORE_GLOBAL_GTT |
 703                 MI_SEMAPHORE_POLL |
 704                 MI_SEMAPHORE_SAD_NEQ_SDD;
 705         *cs++ = 0;
 706         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
 707         *cs++ = 0;
 708
 709         if (idx > 0) {
 710                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 711                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 712                 *cs++ = 0;
 713                 *cs++ = 1;
 714         } else {
 715                 *cs++ = MI_NOOP;
 716                 *cs++ = MI_NOOP;
 717                 *cs++ = MI_NOOP;
 718                 *cs++ = MI_NOOP;
 719         }
 720
 721         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 722
 723         intel_ring_advance(rq, cs);
 724         return 0;
 725 }
 726
 727 static struct i915_request *
 728 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 729 {
 730         struct intel_context *ce;
 731         struct i915_request *rq;
 732         int err;
 733
 734         ce = intel_context_create(engine);
 735         if (IS_ERR(ce))
 736                 return ERR_CAST(ce);
 737
 738         rq = intel_context_create_request(ce);
 739         if (IS_ERR(rq))
 740                 goto out_ce;
 741
 742         err = 0;
 743         if (rq->engine->emit_init_breadcrumb)
 744                 err = rq->engine->emit_init_breadcrumb(rq);
 745         if (err == 0)
 746                 err = emit_semaphore_chain(rq, vma, idx);
 747         if (err == 0)
 748                 i915_request_get(rq);
 749         i915_request_add(rq);
 750         if (err)
 751                 rq = ERR_PTR(err);
 752
 753 out_ce:
 754         intel_context_put(ce);
 755         return rq;
 756 }
 757
 758 static int
 759 release_queue(struct intel_engine_cs *engine,
 760               struct i915_vma *vma,
 761               int idx, int prio)
 762 {
 763         struct i915_sched_attr attr = {
 764                 .priority = prio,
 765         };
 766         struct i915_request *rq;
 767         u32 *cs;
 768
 769         rq = intel_engine_create_kernel_request(engine);
 770         if (IS_ERR(rq))
 771                 return PTR_ERR(rq);
 772
 773         cs = intel_ring_begin(rq, 4);
 774         if (IS_ERR(cs)) {
 775                 i915_request_add(rq);
 776                 return PTR_ERR(cs);
 777         }
 778
 779         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 780         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 781         *cs++ = 0;
 782         *cs++ = 1;
 783
 784         intel_ring_advance(rq, cs);
 785
 786         i915_request_get(rq);
 787         i915_request_add(rq);
 788
 789         local_bh_disable();
 790         engine->schedule(rq, &attr);
 791         local_bh_enable(); /* kick tasklet */
 792
 793         i915_request_put(rq);
 794
 795         return 0;
 796 }
 797
 798 static int
 799 slice_semaphore_queue(struct intel_engine_cs *outer,
 800                       struct i915_vma *vma,
 801                       int count)
 802 {
 803         struct intel_engine_cs *engine;
 804         struct i915_request *head;
 805         enum intel_engine_id id;
 806         int err, i, n = 0;
 807
 808         head = semaphore_queue(outer, vma, n++);
 809         if (IS_ERR(head))
 810                 return PTR_ERR(head);
 811
 812         for_each_engine(engine, outer->gt, id) {
 813                 for (i = 0; i < count; i++) {
 814                         struct i915_request *rq;
 815
 816                         rq = semaphore_queue(engine, vma, n++);
 817                         if (IS_ERR(rq)) {
 818                                 err = PTR_ERR(rq);
 819                                 goto out;
 820                         }
 821
 822                         i915_request_put(rq);
 823                 }
 824         }
 825
 826         err = release_queue(outer, vma, n, INT_MAX);
 827         if (err)
 828                 goto out;
 829
 830         if (i915_request_wait(head, 0,
 831                               2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 832                 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 833                        count, n);
 834                 GEM_TRACE_DUMP();
 835                 intel_gt_set_wedged(outer->gt);
 836                 err = -EIO;
 837         }
 838
 839 out:
 840         i915_request_put(head);
 841         return err;
 842 }
 843
 844 static int live_timeslice_preempt(void *arg)
 845 {
 846         struct intel_gt *gt = arg;
 847         struct drm_i915_gem_object *obj;
 848         struct i915_vma *vma;
 849         void *vaddr;
 850         int err = 0;
 851         int count;
 852
 853         /*
 854          * If a request takes too long, we would like to give other users
 855          * a fair go on the GPU. In particular, users may create batches
 856          * that wait upon external input, where that input may even be
 857          * supplied by another GPU job. To avoid blocking forever, we
 858          * need to preempt the current task and replace it with another
 859          * ready task.
 860          */
 861         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 862                 return 0;
 863
 864         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 865         if (IS_ERR(obj))
 866                 return PTR_ERR(obj);
 867
 868         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 869         if (IS_ERR(vma)) {
 870                 err = PTR_ERR(vma);
 871                 goto err_obj;
 872         }
 873
 874         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
 875         if (IS_ERR(vaddr)) {
 876                 err = PTR_ERR(vaddr);
 877                 goto err_obj;
 878         }
 879
 880         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
 881         if (err)
 882                 goto err_map;
 883
 884         err = i915_vma_sync(vma);
 885         if (err)
 886                 goto err_pin;
 887
 888         for_each_prime_number_from(count, 1, 16) {
 889                 struct intel_engine_cs *engine;
 890                 enum intel_engine_id id;
 891
 892                 for_each_engine(engine, gt, id) {
 893                         if (!intel_engine_has_preemption(engine))
 894                                 continue;
 895
 896                         memset(vaddr, 0, PAGE_SIZE);
 897
 898                         engine_heartbeat_disable(engine);
 899                         err = slice_semaphore_queue(engine, vma, count);
 900                         engine_heartbeat_enable(engine);
 901                         if (err)
 902                                 goto err_pin;
 903
 904                         if (igt_flush_test(gt->i915)) {
 905                                 err = -EIO;
 906                                 goto err_pin;
 907                         }
 908                 }
 909         }
 910
 911 err_pin:
 912         i915_vma_unpin(vma);
 913 err_map:
 914         i915_gem_object_unpin_map(obj);
 915 err_obj:
 916         i915_gem_object_put(obj);
 917         return err;
 918 }
 919
 920 static struct i915_request *
 921 create_rewinder(struct intel_context *ce,
 922                 struct i915_request *wait,
 923                 void *slot, int idx)
 924 {
 925         const u32 offset =
 926                 i915_ggtt_offset(ce->engine->status_page.vma) +
 927                 offset_in_page(slot);
 928         struct i915_request *rq;
 929         u32 *cs;
 930         int err;
 931
 932         rq = intel_context_create_request(ce);
 933         if (IS_ERR(rq))
 934                 return rq;
 935
 936         if (wait) {
 937                 err = i915_request_await_dma_fence(rq, &wait->fence);
 938                 if (err)
 939                         goto err;
 940         }
 941
 942         cs = intel_ring_begin(rq, 14);
 943         if (IS_ERR(cs)) {
 944                 err = PTR_ERR(cs);
 945                 goto err;
 946         }
 947
 948         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 949         *cs++ = MI_NOOP;
 950
 951         *cs++ = MI_SEMAPHORE_WAIT |
 952                 MI_SEMAPHORE_GLOBAL_GTT |
 953                 MI_SEMAPHORE_POLL |
 954                 MI_SEMAPHORE_SAD_GTE_SDD;
 955         *cs++ = idx;
 956         *cs++ = offset;
 957         *cs++ = 0;
 958
 959         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
 960         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
 961         *cs++ = offset + idx * sizeof(u32);
 962         *cs++ = 0;
 963
 964         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 965         *cs++ = offset;
 966         *cs++ = 0;
 967         *cs++ = idx + 1;
 968
 969         intel_ring_advance(rq, cs);
 970
 971         rq->sched.attr.priority = I915_PRIORITY_MASK;
 972         err = 0;
 973 err:
 974         i915_request_get(rq);
 975         i915_request_add(rq);
 976         if (err) {
 977                 i915_request_put(rq);
 978                 return ERR_PTR(err);
 979         }
 980
 981         return rq;
 982 }
 983
 984 static int live_timeslice_rewind(void *arg)
 985 {
 986         struct intel_gt *gt = arg;
 987         struct intel_engine_cs *engine;
 988         enum intel_engine_id id;
 989
 990         /*
 991          * The usual presumption on timeslice expiration is that we replace
 992          * the active context with another. However, given a chain of
 993          * dependencies we may end up with replacing the context with itself,
 994          * but only a few of those requests, forcing us to rewind the
 995          * RING_TAIL of the original request.
 996          */
 997         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 998                 return 0;
 999
1000         for_each_engine(engine, gt, id) {
1001                 enum { A1, A2, B1 };
1002                 enum { X = 1, Z, Y };
1003                 struct i915_request *rq[3] = {};
1004                 struct intel_context *ce;
1005                 unsigned long timeslice;
1006                 int i, err = 0;
1007                 u32 *slot;
1008
1009                 if (!intel_engine_has_timeslices(engine))
1010                         continue;
1011
1012                 /*
1013                  * A:rq1 -- semaphore wait, timestamp X
1014                  * A:rq2 -- write timestamp Y
1015                  *
1016                  * B:rq1 [await A:rq1] -- write timestamp Z
1017                  *
1018                  * Force timeslice, release semaphore.
1019                  *
1020                  * Expect execution/evaluation order XZY
1021                  */
1022
1023                 engine_heartbeat_disable(engine);
1024                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1025
1026                 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1027
1028                 ce = intel_context_create(engine);
1029                 if (IS_ERR(ce)) {
1030                         err = PTR_ERR(ce);
1031                         goto err;
1032                 }
1033
1034                 rq[0] = create_rewinder(ce, NULL, slot, X);
1035                 if (IS_ERR(rq[0])) {
1036                         intel_context_put(ce);
1037                         goto err;
1038                 }
1039
1040                 rq[1] = create_rewinder(ce, NULL, slot, Y);
1041                 intel_context_put(ce);
1042                 if (IS_ERR(rq[1]))
1043                         goto err;
1044
1045                 err = wait_for_submit(engine, rq[1], HZ / 2);
1046                 if (err) {
1047                         pr_err("%s: failed to submit first context\n",
1048                                engine->name);
1049                         goto err;
1050                 }
1051
1052                 ce = intel_context_create(engine);
1053                 if (IS_ERR(ce)) {
1054                         err = PTR_ERR(ce);
1055                         goto err;
1056                 }
1057
1058                 rq[2] = create_rewinder(ce, rq[0], slot, Z);
1059                 intel_context_put(ce);
1060                 if (IS_ERR(rq[2]))
1061                         goto err;
1062
1063                 err = wait_for_submit(engine, rq[2], HZ / 2);
1064                 if (err) {
1065                         pr_err("%s: failed to submit second context\n",
1066                                engine->name);
1067                         goto err;
1068                 }
1069
1070                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1071                 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1072                         /* Wait for the timeslice to kick in */
1073                         del_timer(&engine->execlists.timer);
1074                         tasklet_hi_schedule(&engine->execlists.tasklet);
1075                         intel_engine_flush_submission(engine);
1076                 }
1077                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1078                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1079                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1080                 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1081
1082                 /* Release the hounds! */
1083                 slot[0] = 1;
1084                 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1085
1086                 for (i = 1; i <= 3; i++) {
1087                         unsigned long timeout = jiffies + HZ / 2;
1088
1089                         while (!READ_ONCE(slot[i]) &&
1090                                time_before(jiffies, timeout))
1091                                 ;
1092
1093                         if (!time_before(jiffies, timeout)) {
1094                                 pr_err("%s: rq[%d] timed out\n",
1095                                        engine->name, i - 1);
1096                                 err = -ETIME;
1097                                 goto err;
1098                         }
1099
1100                         pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1101                 }
1102
1103                 /* XZY: XZ < XY */
1104                 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1105                         pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1106                                engine->name,
1107                                slot[Z] - slot[X],
1108                                slot[Y] - slot[X]);
1109                         err = -EINVAL;
1110                 }
1111
1112 err:
1113                 memset32(&slot[0], -1, 4);
1114                 wmb();
1115
1116                 engine->props.timeslice_duration_ms = timeslice;
1117                 engine_heartbeat_enable(engine);
1118                 for (i = 0; i < 3; i++)
1119                         i915_request_put(rq[i]);
1120                 if (igt_flush_test(gt->i915))
1121                         err = -EIO;
1122                 if (err)
1123                         return err;
1124         }
1125
1126         return 0;
1127 }
1128
1129 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1130 {
1131         struct i915_request *rq;
1132
1133         rq = intel_engine_create_kernel_request(engine);
1134         if (IS_ERR(rq))
1135                 return rq;
1136
1137         i915_request_get(rq);
1138         i915_request_add(rq);
1139
1140         return rq;
1141 }
1142
1143 static long timeslice_threshold(const struct intel_engine_cs *engine)
1144 {
1145         return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1146 }
1147
1148 static int live_timeslice_queue(void *arg)
1149 {
1150         struct intel_gt *gt = arg;
1151         struct drm_i915_gem_object *obj;
1152         struct intel_engine_cs *engine;
1153         enum intel_engine_id id;
1154         struct i915_vma *vma;
1155         void *vaddr;
1156         int err = 0;
1157
1158         /*
1159          * Make sure that even if ELSP[0] and ELSP[1] are filled with
1160          * timeslicing between them disabled, we *do* enable timeslicing
1161          * if the queue demands it. (Normally, we do not submit if
1162          * ELSP[1] is already occupied, so must rely on timeslicing to
1163          * eject ELSP[0] in favour of the queue.)
1164          */
1165         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1166                 return 0;
1167
1168         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1169         if (IS_ERR(obj))
1170                 return PTR_ERR(obj);
1171
1172         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1173         if (IS_ERR(vma)) {
1174                 err = PTR_ERR(vma);
1175                 goto err_obj;
1176         }
1177
1178         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1179         if (IS_ERR(vaddr)) {
1180                 err = PTR_ERR(vaddr);
1181                 goto err_obj;
1182         }
1183
1184         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1185         if (err)
1186                 goto err_map;
1187
1188         err = i915_vma_sync(vma);
1189         if (err)
1190                 goto err_pin;
1191
1192         for_each_engine(engine, gt, id) {
1193                 struct i915_sched_attr attr = {
1194                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1195                 };
1196                 struct i915_request *rq, *nop;
1197
1198                 if (!intel_engine_has_preemption(engine))
1199                         continue;
1200
1201                 engine_heartbeat_disable(engine);
1202                 memset(vaddr, 0, PAGE_SIZE);
1203
1204                 /* ELSP[0]: semaphore wait */
1205                 rq = semaphore_queue(engine, vma, 0);
1206                 if (IS_ERR(rq)) {
1207                         err = PTR_ERR(rq);
1208                         goto err_heartbeat;
1209                 }
1210                 engine->schedule(rq, &attr);
1211                 err = wait_for_submit(engine, rq, HZ / 2);
1212                 if (err) {
1213                         pr_err("%s: Timed out trying to submit semaphores\n",
1214                                engine->name);
1215                         goto err_rq;
1216                 }
1217
1218                 /* ELSP[1]: nop request */
1219                 nop = nop_request(engine);
1220                 if (IS_ERR(nop)) {
1221                         err = PTR_ERR(nop);
1222                         goto err_rq;
1223                 }
1224                 err = wait_for_submit(engine, nop, HZ / 2);
1225                 i915_request_put(nop);
1226                 if (err) {
1227                         pr_err("%s: Timed out trying to submit nop\n",
1228                                engine->name);
1229                         goto err_rq;
1230                 }
1231
1232                 GEM_BUG_ON(i915_request_completed(rq));
1233                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1234
1235                 /* Queue: semaphore signal, matching priority as semaphore */
1236                 err = release_queue(engine, vma, 1, effective_prio(rq));
1237                 if (err)
1238                         goto err_rq;
1239
1240                 /* Wait until we ack the release_queue and start timeslicing */
1241                 do {
1242                         cond_resched();
1243                         intel_engine_flush_submission(engine);
1244                 } while (READ_ONCE(engine->execlists.pending[0]));
1245
1246                 if (!READ_ONCE(engine->execlists.timer.expires) &&
1247                     execlists_active(&engine->execlists) == rq &&
1248                     !i915_request_completed(rq)) {
1249                         struct drm_printer p =
1250                                 drm_info_printer(gt->i915->drm.dev);
1251
1252                         GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1253                                       engine->name);
1254                         intel_engine_dump(engine, &p,
1255                                           "%s\n", engine->name);
1256                         GEM_TRACE_DUMP();
1257
1258                         memset(vaddr, 0xff, PAGE_SIZE);
1259                         err = -EINVAL;
1260                 }
1261
1262                 /* Timeslice every jiffy, so within 2 we should signal */
1263                 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1264                         struct drm_printer p =
1265                                 drm_info_printer(gt->i915->drm.dev);
1266
1267                         pr_err("%s: Failed to timeslice into queue\n",
1268                                engine->name);
1269                         intel_engine_dump(engine, &p,
1270                                           "%s\n", engine->name);
1271
1272                         memset(vaddr, 0xff, PAGE_SIZE);
1273                         err = -EIO;
1274                 }
1275 err_rq:
1276                 i915_request_put(rq);
1277 err_heartbeat:
1278                 engine_heartbeat_enable(engine);
1279                 if (err)
1280                         break;
1281         }
1282
1283 err_pin:
1284         i915_vma_unpin(vma);
1285 err_map:
1286         i915_gem_object_unpin_map(obj);
1287 err_obj:
1288         i915_gem_object_put(obj);
1289         return err;
1290 }
1291
1292 static int live_busywait_preempt(void *arg)
1293 {
1294         struct intel_gt *gt = arg;
1295         struct i915_gem_context *ctx_hi, *ctx_lo;
1296         struct intel_engine_cs *engine;
1297         struct drm_i915_gem_object *obj;
1298         struct i915_vma *vma;
1299         enum intel_engine_id id;
1300         int err = -ENOMEM;
1301         u32 *map;
1302
1303         /*
1304          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1305          * preempt the busywaits used to synchronise between rings.
1306          */
1307
1308         ctx_hi = kernel_context(gt->i915);
1309         if (!ctx_hi)
1310                 return -ENOMEM;
1311         ctx_hi->sched.priority =
1312                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1313
1314         ctx_lo = kernel_context(gt->i915);
1315         if (!ctx_lo)
1316                 goto err_ctx_hi;
1317         ctx_lo->sched.priority =
1318                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1319
1320         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1321         if (IS_ERR(obj)) {
1322                 err = PTR_ERR(obj);
1323                 goto err_ctx_lo;
1324         }
1325
1326         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1327         if (IS_ERR(map)) {
1328                 err = PTR_ERR(map);
1329                 goto err_obj;
1330         }
1331
1332         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1333         if (IS_ERR(vma)) {
1334                 err = PTR_ERR(vma);
1335                 goto err_map;
1336         }
1337
1338         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1339         if (err)
1340                 goto err_map;
1341
1342         err = i915_vma_sync(vma);
1343         if (err)
1344                 goto err_vma;
1345
1346         for_each_engine(engine, gt, id) {
1347                 struct i915_request *lo, *hi;
1348                 struct igt_live_test t;
1349                 u32 *cs;
1350
1351                 if (!intel_engine_has_preemption(engine))
1352                         continue;
1353
1354                 if (!intel_engine_can_store_dword(engine))
1355                         continue;
1356
1357                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1358                         err = -EIO;
1359                         goto err_vma;
1360                 }
1361
1362                 /*
1363                  * We create two requests. The low priority request
1364                  * busywaits on a semaphore (inside the ringbuffer where
1365                  * is should be preemptible) and the high priority requests
1366                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
1367                  * allowing the first request to complete. If preemption
1368                  * fails, we hang instead.
1369                  */
1370
1371                 lo = igt_request_alloc(ctx_lo, engine);
1372                 if (IS_ERR(lo)) {
1373                         err = PTR_ERR(lo);
1374                         goto err_vma;
1375                 }
1376
1377                 cs = intel_ring_begin(lo, 8);
1378                 if (IS_ERR(cs)) {
1379                         err = PTR_ERR(cs);
1380                         i915_request_add(lo);
1381                         goto err_vma;
1382                 }
1383
1384                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1385                 *cs++ = i915_ggtt_offset(vma);
1386                 *cs++ = 0;
1387                 *cs++ = 1;
1388
1389                 /* XXX Do we need a flush + invalidate here? */
1390
1391                 *cs++ = MI_SEMAPHORE_WAIT |
1392                         MI_SEMAPHORE_GLOBAL_GTT |
1393                         MI_SEMAPHORE_POLL |
1394                         MI_SEMAPHORE_SAD_EQ_SDD;
1395                 *cs++ = 0;
1396                 *cs++ = i915_ggtt_offset(vma);
1397                 *cs++ = 0;
1398
1399                 intel_ring_advance(lo, cs);
1400
1401                 i915_request_get(lo);
1402                 i915_request_add(lo);
1403
1404                 if (wait_for(READ_ONCE(*map), 10)) {
1405                         i915_request_put(lo);
1406                         err = -ETIMEDOUT;
1407                         goto err_vma;
1408                 }
1409
1410                 /* Low priority request should be busywaiting now */
1411                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1412                         i915_request_put(lo);
1413                         pr_err("%s: Busywaiting request did not!\n",
1414                                engine->name);
1415                         err = -EIO;
1416                         goto err_vma;
1417                 }
1418
1419                 hi = igt_request_alloc(ctx_hi, engine);
1420                 if (IS_ERR(hi)) {
1421                         err = PTR_ERR(hi);
1422                         i915_request_put(lo);
1423                         goto err_vma;
1424                 }
1425
1426                 cs = intel_ring_begin(hi, 4);
1427                 if (IS_ERR(cs)) {
1428                         err = PTR_ERR(cs);
1429                         i915_request_add(hi);
1430                         i915_request_put(lo);
1431                         goto err_vma;
1432                 }
1433
1434                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1435                 *cs++ = i915_ggtt_offset(vma);
1436                 *cs++ = 0;
1437                 *cs++ = 0;
1438
1439                 intel_ring_advance(hi, cs);
1440                 i915_request_add(hi);
1441
1442                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1443                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1444
1445                         pr_err("%s: Failed to preempt semaphore busywait!\n",
1446                                engine->name);
1447
1448                         intel_engine_dump(engine, &p, "%s\n", engine->name);
1449                         GEM_TRACE_DUMP();
1450
1451                         i915_request_put(lo);
1452                         intel_gt_set_wedged(gt);
1453                         err = -EIO;
1454                         goto err_vma;
1455                 }
1456                 GEM_BUG_ON(READ_ONCE(*map));
1457                 i915_request_put(lo);
1458
1459                 if (igt_live_test_end(&t)) {
1460                         err = -EIO;
1461                         goto err_vma;
1462                 }
1463         }
1464
1465         err = 0;
1466 err_vma:
1467         i915_vma_unpin(vma);
1468 err_map:
1469         i915_gem_object_unpin_map(obj);
1470 err_obj:
1471         i915_gem_object_put(obj);
1472 err_ctx_lo:
1473         kernel_context_close(ctx_lo);
1474 err_ctx_hi:
1475         kernel_context_close(ctx_hi);
1476         return err;
1477 }
1478
1479 static struct i915_request *
1480 spinner_create_request(struct igt_spinner *spin,
1481                        struct i915_gem_context *ctx,
1482                        struct intel_engine_cs *engine,
1483                        u32 arb)
1484 {
1485         struct intel_context *ce;
1486         struct i915_request *rq;
1487
1488         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1489         if (IS_ERR(ce))
1490                 return ERR_CAST(ce);
1491
1492         rq = igt_spinner_create_request(spin, ce, arb);
1493         intel_context_put(ce);
1494         return rq;
1495 }
1496
1497 static int live_preempt(void *arg)
1498 {
1499         struct intel_gt *gt = arg;
1500         struct i915_gem_context *ctx_hi, *ctx_lo;
1501         struct igt_spinner spin_hi, spin_lo;
1502         struct intel_engine_cs *engine;
1503         enum intel_engine_id id;
1504         int err = -ENOMEM;
1505
1506         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1507                 return 0;
1508
1509         if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1510                 pr_err("Logical preemption supported, but not exposed\n");
1511
1512         if (igt_spinner_init(&spin_hi, gt))
1513                 return -ENOMEM;
1514
1515         if (igt_spinner_init(&spin_lo, gt))
1516                 goto err_spin_hi;
1517
1518         ctx_hi = kernel_context(gt->i915);
1519         if (!ctx_hi)
1520                 goto err_spin_lo;
1521         ctx_hi->sched.priority =
1522                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1523
1524         ctx_lo = kernel_context(gt->i915);
1525         if (!ctx_lo)
1526                 goto err_ctx_hi;
1527         ctx_lo->sched.priority =
1528                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1529
1530         for_each_engine(engine, gt, id) {
1531                 struct igt_live_test t;
1532                 struct i915_request *rq;
1533
1534                 if (!intel_engine_has_preemption(engine))
1535                         continue;
1536
1537                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1538                         err = -EIO;
1539                         goto err_ctx_lo;
1540                 }
1541
1542                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1543                                             MI_ARB_CHECK);
1544                 if (IS_ERR(rq)) {
1545                         err = PTR_ERR(rq);
1546                         goto err_ctx_lo;
1547                 }
1548
1549                 i915_request_add(rq);
1550                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1551                         GEM_TRACE("lo spinner failed to start\n");
1552                         GEM_TRACE_DUMP();
1553                         intel_gt_set_wedged(gt);
1554                         err = -EIO;
1555                         goto err_ctx_lo;
1556                 }
1557
1558                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1559                                             MI_ARB_CHECK);
1560                 if (IS_ERR(rq)) {
1561                         igt_spinner_end(&spin_lo);
1562                         err = PTR_ERR(rq);
1563                         goto err_ctx_lo;
1564                 }
1565
1566                 i915_request_add(rq);
1567                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1568                         GEM_TRACE("hi spinner failed to start\n");
1569                         GEM_TRACE_DUMP();
1570                         intel_gt_set_wedged(gt);
1571                         err = -EIO;
1572                         goto err_ctx_lo;
1573                 }
1574
1575                 igt_spinner_end(&spin_hi);
1576                 igt_spinner_end(&spin_lo);
1577
1578                 if (igt_live_test_end(&t)) {
1579                         err = -EIO;
1580                         goto err_ctx_lo;
1581                 }
1582         }
1583
1584         err = 0;
1585 err_ctx_lo:
1586         kernel_context_close(ctx_lo);
1587 err_ctx_hi:
1588         kernel_context_close(ctx_hi);
1589 err_spin_lo:
1590         igt_spinner_fini(&spin_lo);
1591 err_spin_hi:
1592         igt_spinner_fini(&spin_hi);
1593         return err;
1594 }
1595
1596 static int live_late_preempt(void *arg)
1597 {
1598         struct intel_gt *gt = arg;
1599         struct i915_gem_context *ctx_hi, *ctx_lo;
1600         struct igt_spinner spin_hi, spin_lo;
1601         struct intel_engine_cs *engine;
1602         struct i915_sched_attr attr = {};
1603         enum intel_engine_id id;
1604         int err = -ENOMEM;
1605
1606         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1607                 return 0;
1608
1609         if (igt_spinner_init(&spin_hi, gt))
1610                 return -ENOMEM;
1611
1612         if (igt_spinner_init(&spin_lo, gt))
1613                 goto err_spin_hi;
1614
1615         ctx_hi = kernel_context(gt->i915);
1616         if (!ctx_hi)
1617                 goto err_spin_lo;
1618
1619         ctx_lo = kernel_context(gt->i915);
1620         if (!ctx_lo)
1621                 goto err_ctx_hi;
1622
1623         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1624         ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1625
1626         for_each_engine(engine, gt, id) {
1627                 struct igt_live_test t;
1628                 struct i915_request *rq;
1629
1630                 if (!intel_engine_has_preemption(engine))
1631                         continue;
1632
1633                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1634                         err = -EIO;
1635                         goto err_ctx_lo;
1636                 }
1637
1638                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1639                                             MI_ARB_CHECK);
1640                 if (IS_ERR(rq)) {
1641                         err = PTR_ERR(rq);
1642                         goto err_ctx_lo;
1643                 }
1644
1645                 i915_request_add(rq);
1646                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1647                         pr_err("First context failed to start\n");
1648                         goto err_wedged;
1649                 }
1650
1651                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1652                                             MI_NOOP);
1653                 if (IS_ERR(rq)) {
1654                         igt_spinner_end(&spin_lo);
1655                         err = PTR_ERR(rq);
1656                         goto err_ctx_lo;
1657                 }
1658
1659                 i915_request_add(rq);
1660                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1661                         pr_err("Second context overtook first?\n");
1662                         goto err_wedged;
1663                 }
1664
1665                 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1666                 engine->schedule(rq, &attr);
1667
1668                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1669                         pr_err("High priority context failed to preempt the low priority context\n");
1670                         GEM_TRACE_DUMP();
1671                         goto err_wedged;
1672                 }
1673
1674                 igt_spinner_end(&spin_hi);
1675                 igt_spinner_end(&spin_lo);
1676
1677                 if (igt_live_test_end(&t)) {
1678                         err = -EIO;
1679                         goto err_ctx_lo;
1680                 }
1681         }
1682
1683         err = 0;
1684 err_ctx_lo:
1685         kernel_context_close(ctx_lo);
1686 err_ctx_hi:
1687         kernel_context_close(ctx_hi);
1688 err_spin_lo:
1689         igt_spinner_fini(&spin_lo);
1690 err_spin_hi:
1691         igt_spinner_fini(&spin_hi);
1692         return err;
1693
1694 err_wedged:
1695         igt_spinner_end(&spin_hi);
1696         igt_spinner_end(&spin_lo);
1697         intel_gt_set_wedged(gt);
1698         err = -EIO;
1699         goto err_ctx_lo;
1700 }
1701
1702 struct preempt_client {
1703         struct igt_spinner spin;
1704         struct i915_gem_context *ctx;
1705 };
1706
1707 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1708 {
1709         c->ctx = kernel_context(gt->i915);
1710         if (!c->ctx)
1711                 return -ENOMEM;
1712
1713         if (igt_spinner_init(&c->spin, gt))
1714                 goto err_ctx;
1715
1716         return 0;
1717
1718 err_ctx:
1719         kernel_context_close(c->ctx);
1720         return -ENOMEM;
1721 }
1722
1723 static void preempt_client_fini(struct preempt_client *c)
1724 {
1725         igt_spinner_fini(&c->spin);
1726         kernel_context_close(c->ctx);
1727 }
1728
1729 static int live_nopreempt(void *arg)
1730 {
1731         struct intel_gt *gt = arg;
1732         struct intel_engine_cs *engine;
1733         struct preempt_client a, b;
1734         enum intel_engine_id id;
1735         int err = -ENOMEM;
1736
1737         /*
1738          * Verify that we can disable preemption for an individual request
1739          * that may be being observed and not want to be interrupted.
1740          */
1741
1742         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1743                 return 0;
1744
1745         if (preempt_client_init(gt, &a))
1746                 return -ENOMEM;
1747         if (preempt_client_init(gt, &b))
1748                 goto err_client_a;
1749         b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1750
1751         for_each_engine(engine, gt, id) {
1752                 struct i915_request *rq_a, *rq_b;
1753
1754                 if (!intel_engine_has_preemption(engine))
1755                         continue;
1756
1757                 engine->execlists.preempt_hang.count = 0;
1758
1759                 rq_a = spinner_create_request(&a.spin,
1760                                               a.ctx, engine,
1761                                               MI_ARB_CHECK);
1762                 if (IS_ERR(rq_a)) {
1763                         err = PTR_ERR(rq_a);
1764                         goto err_client_b;
1765                 }
1766
1767                 /* Low priority client, but unpreemptable! */
1768                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1769
1770                 i915_request_add(rq_a);
1771                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1772                         pr_err("First client failed to start\n");
1773                         goto err_wedged;
1774                 }
1775
1776                 rq_b = spinner_create_request(&b.spin,
1777                                               b.ctx, engine,
1778                                               MI_ARB_CHECK);
1779                 if (IS_ERR(rq_b)) {
1780                         err = PTR_ERR(rq_b);
1781                         goto err_client_b;
1782                 }
1783
1784                 i915_request_add(rq_b);
1785
1786                 /* B is much more important than A! (But A is unpreemptable.) */
1787                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1788
1789                 /* Wait long enough for preemption and timeslicing */
1790                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1791                         pr_err("Second client started too early!\n");
1792                         goto err_wedged;
1793                 }
1794
1795                 igt_spinner_end(&a.spin);
1796
1797                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1798                         pr_err("Second client failed to start\n");
1799                         goto err_wedged;
1800                 }
1801
1802                 igt_spinner_end(&b.spin);
1803
1804                 if (engine->execlists.preempt_hang.count) {
1805                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
1806                                engine->execlists.preempt_hang.count);
1807                         err = -EINVAL;
1808                         goto err_wedged;
1809                 }
1810
1811                 if (igt_flush_test(gt->i915))
1812                         goto err_wedged;
1813         }
1814
1815         err = 0;
1816 err_client_b:
1817         preempt_client_fini(&b);
1818 err_client_a:
1819         preempt_client_fini(&a);
1820         return err;
1821
1822 err_wedged:
1823         igt_spinner_end(&b.spin);
1824         igt_spinner_end(&a.spin);
1825         intel_gt_set_wedged(gt);
1826         err = -EIO;
1827         goto err_client_b;
1828 }
1829
1830 struct live_preempt_cancel {
1831         struct intel_engine_cs *engine;
1832         struct preempt_client a, b;
1833 };
1834
1835 static int __cancel_active0(struct live_preempt_cancel *arg)
1836 {
1837         struct i915_request *rq;
1838         struct igt_live_test t;
1839         int err;
1840
1841         /* Preempt cancel of ELSP0 */
1842         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1843         if (igt_live_test_begin(&t, arg->engine->i915,
1844                                 __func__, arg->engine->name))
1845                 return -EIO;
1846
1847         rq = spinner_create_request(&arg->a.spin,
1848                                     arg->a.ctx, arg->engine,
1849                                     MI_ARB_CHECK);
1850         if (IS_ERR(rq))
1851                 return PTR_ERR(rq);
1852
1853         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1854         i915_request_get(rq);
1855         i915_request_add(rq);
1856         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1857                 err = -EIO;
1858                 goto out;
1859         }
1860
1861         intel_context_set_banned(rq->context);
1862         err = intel_engine_pulse(arg->engine);
1863         if (err)
1864                 goto out;
1865
1866         err = wait_for_reset(arg->engine, rq, HZ / 2);
1867         if (err) {
1868                 pr_err("Cancelled inflight0 request did not reset\n");
1869                 goto out;
1870         }
1871
1872 out:
1873         i915_request_put(rq);
1874         if (igt_live_test_end(&t))
1875                 err = -EIO;
1876         return err;
1877 }
1878
1879 static int __cancel_active1(struct live_preempt_cancel *arg)
1880 {
1881         struct i915_request *rq[2] = {};
1882         struct igt_live_test t;
1883         int err;
1884
1885         /* Preempt cancel of ELSP1 */
1886         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1887         if (igt_live_test_begin(&t, arg->engine->i915,
1888                                 __func__, arg->engine->name))
1889                 return -EIO;
1890
1891         rq[0] = spinner_create_request(&arg->a.spin,
1892                                        arg->a.ctx, arg->engine,
1893                                        MI_NOOP); /* no preemption */
1894         if (IS_ERR(rq[0]))
1895                 return PTR_ERR(rq[0]);
1896
1897         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1898         i915_request_get(rq[0]);
1899         i915_request_add(rq[0]);
1900         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1901                 err = -EIO;
1902                 goto out;
1903         }
1904
1905         rq[1] = spinner_create_request(&arg->b.spin,
1906                                        arg->b.ctx, arg->engine,
1907                                        MI_ARB_CHECK);
1908         if (IS_ERR(rq[1])) {
1909                 err = PTR_ERR(rq[1]);
1910                 goto out;
1911         }
1912
1913         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1914         i915_request_get(rq[1]);
1915         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1916         i915_request_add(rq[1]);
1917         if (err)
1918                 goto out;
1919
1920         intel_context_set_banned(rq[1]->context);
1921         err = intel_engine_pulse(arg->engine);
1922         if (err)
1923                 goto out;
1924
1925         igt_spinner_end(&arg->a.spin);
1926         err = wait_for_reset(arg->engine, rq[1], HZ / 2);
1927         if (err)
1928                 goto out;
1929
1930         if (rq[0]->fence.error != 0) {
1931                 pr_err("Normal inflight0 request did not complete\n");
1932                 err = -EINVAL;
1933                 goto out;
1934         }
1935
1936         if (rq[1]->fence.error != -EIO) {
1937                 pr_err("Cancelled inflight1 request did not report -EIO\n");
1938                 err = -EINVAL;
1939                 goto out;
1940         }
1941
1942 out:
1943         i915_request_put(rq[1]);
1944         i915_request_put(rq[0]);
1945         if (igt_live_test_end(&t))
1946                 err = -EIO;
1947         return err;
1948 }
1949
1950 static int __cancel_queued(struct live_preempt_cancel *arg)
1951 {
1952         struct i915_request *rq[3] = {};
1953         struct igt_live_test t;
1954         int err;
1955
1956         /* Full ELSP and one in the wings */
1957         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1958         if (igt_live_test_begin(&t, arg->engine->i915,
1959                                 __func__, arg->engine->name))
1960                 return -EIO;
1961
1962         rq[0] = spinner_create_request(&arg->a.spin,
1963                                        arg->a.ctx, arg->engine,
1964                                        MI_ARB_CHECK);
1965         if (IS_ERR(rq[0]))
1966                 return PTR_ERR(rq[0]);
1967
1968         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1969         i915_request_get(rq[0]);
1970         i915_request_add(rq[0]);
1971         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1972                 err = -EIO;
1973                 goto out;
1974         }
1975
1976         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1977         if (IS_ERR(rq[1])) {
1978                 err = PTR_ERR(rq[1]);
1979                 goto out;
1980         }
1981
1982         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1983         i915_request_get(rq[1]);
1984         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1985         i915_request_add(rq[1]);
1986         if (err)
1987                 goto out;
1988
1989         rq[2] = spinner_create_request(&arg->b.spin,
1990                                        arg->a.ctx, arg->engine,
1991                                        MI_ARB_CHECK);
1992         if (IS_ERR(rq[2])) {
1993                 err = PTR_ERR(rq[2]);
1994                 goto out;
1995         }
1996
1997         i915_request_get(rq[2]);
1998         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1999         i915_request_add(rq[2]);
2000         if (err)
2001                 goto out;
2002
2003         intel_context_set_banned(rq[2]->context);
2004         err = intel_engine_pulse(arg->engine);
2005         if (err)
2006                 goto out;
2007
2008         err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2009         if (err)
2010                 goto out;
2011
2012         if (rq[0]->fence.error != -EIO) {
2013                 pr_err("Cancelled inflight0 request did not report -EIO\n");
2014                 err = -EINVAL;
2015                 goto out;
2016         }
2017
2018         if (rq[1]->fence.error != 0) {
2019                 pr_err("Normal inflight1 request did not complete\n");
2020                 err = -EINVAL;
2021                 goto out;
2022         }
2023
2024         if (rq[2]->fence.error != -EIO) {
2025                 pr_err("Cancelled queued request did not report -EIO\n");
2026                 err = -EINVAL;
2027                 goto out;
2028         }
2029
2030 out:
2031         i915_request_put(rq[2]);
2032         i915_request_put(rq[1]);
2033         i915_request_put(rq[0]);
2034         if (igt_live_test_end(&t))
2035                 err = -EIO;
2036         return err;
2037 }
2038
2039 static int __cancel_hostile(struct live_preempt_cancel *arg)
2040 {
2041         struct i915_request *rq;
2042         int err;
2043
2044         /* Preempt cancel non-preemptible spinner in ELSP0 */
2045         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2046                 return 0;
2047
2048         if (!intel_has_reset_engine(arg->engine->gt))
2049                 return 0;
2050
2051         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2052         rq = spinner_create_request(&arg->a.spin,
2053                                     arg->a.ctx, arg->engine,
2054                                     MI_NOOP); /* preemption disabled */
2055         if (IS_ERR(rq))
2056                 return PTR_ERR(rq);
2057
2058         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2059         i915_request_get(rq);
2060         i915_request_add(rq);
2061         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2062                 err = -EIO;
2063                 goto out;
2064         }
2065
2066         intel_context_set_banned(rq->context);
2067         err = intel_engine_pulse(arg->engine); /* force reset */
2068         if (err)
2069                 goto out;
2070
2071         err = wait_for_reset(arg->engine, rq, HZ / 2);
2072         if (err) {
2073                 pr_err("Cancelled inflight0 request did not reset\n");
2074                 goto out;
2075         }
2076
2077 out:
2078         i915_request_put(rq);
2079         if (igt_flush_test(arg->engine->i915))
2080                 err = -EIO;
2081         return err;
2082 }
2083
2084 static int live_preempt_cancel(void *arg)
2085 {
2086         struct intel_gt *gt = arg;
2087         struct live_preempt_cancel data;
2088         enum intel_engine_id id;
2089         int err = -ENOMEM;
2090
2091         /*
2092          * To cancel an inflight context, we need to first remove it from the
2093          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2094          */
2095
2096         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2097                 return 0;
2098
2099         if (preempt_client_init(gt, &data.a))
2100                 return -ENOMEM;
2101         if (preempt_client_init(gt, &data.b))
2102                 goto err_client_a;
2103
2104         for_each_engine(data.engine, gt, id) {
2105                 if (!intel_engine_has_preemption(data.engine))
2106                         continue;
2107
2108                 err = __cancel_active0(&data);
2109                 if (err)
2110                         goto err_wedged;
2111
2112                 err = __cancel_active1(&data);
2113                 if (err)
2114                         goto err_wedged;
2115
2116                 err = __cancel_queued(&data);
2117                 if (err)
2118                         goto err_wedged;
2119
2120                 err = __cancel_hostile(&data);
2121                 if (err)
2122                         goto err_wedged;
2123         }
2124
2125         err = 0;
2126 err_client_b:
2127         preempt_client_fini(&data.b);
2128 err_client_a:
2129         preempt_client_fini(&data.a);
2130         return err;
2131
2132 err_wedged:
2133         GEM_TRACE_DUMP();
2134         igt_spinner_end(&data.b.spin);
2135         igt_spinner_end(&data.a.spin);
2136         intel_gt_set_wedged(gt);
2137         goto err_client_b;
2138 }
2139
2140 static int live_suppress_self_preempt(void *arg)
2141 {
2142         struct intel_gt *gt = arg;
2143         struct intel_engine_cs *engine;
2144         struct i915_sched_attr attr = {
2145                 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2146         };
2147         struct preempt_client a, b;
2148         enum intel_engine_id id;
2149         int err = -ENOMEM;
2150
2151         /*
2152          * Verify that if a preemption request does not cause a change in
2153          * the current execution order, the preempt-to-idle injection is
2154          * skipped and that we do not accidentally apply it after the CS
2155          * completion event.
2156          */
2157
2158         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2159                 return 0;
2160
2161         if (intel_uc_uses_guc_submission(&gt->uc))
2162                 return 0; /* presume black blox */
2163
2164         if (intel_vgpu_active(gt->i915))
2165                 return 0; /* GVT forces single port & request submission */
2166
2167         if (preempt_client_init(gt, &a))
2168                 return -ENOMEM;
2169         if (preempt_client_init(gt, &b))
2170                 goto err_client_a;
2171
2172         for_each_engine(engine, gt, id) {
2173                 struct i915_request *rq_a, *rq_b;
2174                 int depth;
2175
2176                 if (!intel_engine_has_preemption(engine))
2177                         continue;
2178
2179                 if (igt_flush_test(gt->i915))
2180                         goto err_wedged;
2181
2182                 intel_engine_pm_get(engine);
2183                 engine->execlists.preempt_hang.count = 0;
2184
2185                 rq_a = spinner_create_request(&a.spin,
2186                                               a.ctx, engine,
2187                                               MI_NOOP);
2188                 if (IS_ERR(rq_a)) {
2189                         err = PTR_ERR(rq_a);
2190                         intel_engine_pm_put(engine);
2191                         goto err_client_b;
2192                 }
2193
2194                 i915_request_add(rq_a);
2195                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2196                         pr_err("First client failed to start\n");
2197                         intel_engine_pm_put(engine);
2198                         goto err_wedged;
2199                 }
2200
2201                 /* Keep postponing the timer to avoid premature slicing */
2202                 mod_timer(&engine->execlists.timer, jiffies + HZ);
2203                 for (depth = 0; depth < 8; depth++) {
2204                         rq_b = spinner_create_request(&b.spin,
2205                                                       b.ctx, engine,
2206                                                       MI_NOOP);
2207                         if (IS_ERR(rq_b)) {
2208                                 err = PTR_ERR(rq_b);
2209                                 intel_engine_pm_put(engine);
2210                                 goto err_client_b;
2211                         }
2212                         i915_request_add(rq_b);
2213
2214                         GEM_BUG_ON(i915_request_completed(rq_a));
2215                         engine->schedule(rq_a, &attr);
2216                         igt_spinner_end(&a.spin);
2217
2218                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2219                                 pr_err("Second client failed to start\n");
2220                                 intel_engine_pm_put(engine);
2221                                 goto err_wedged;
2222                         }
2223
2224                         swap(a, b);
2225                         rq_a = rq_b;
2226                 }
2227                 igt_spinner_end(&a.spin);
2228
2229                 if (engine->execlists.preempt_hang.count) {
2230                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2231                                engine->name,
2232                                engine->execlists.preempt_hang.count,
2233                                depth);
2234                         intel_engine_pm_put(engine);
2235                         err = -EINVAL;
2236                         goto err_client_b;
2237                 }
2238
2239                 intel_engine_pm_put(engine);
2240                 if (igt_flush_test(gt->i915))
2241                         goto err_wedged;
2242         }
2243
2244         err = 0;
2245 err_client_b:
2246         preempt_client_fini(&b);
2247 err_client_a:
2248         preempt_client_fini(&a);
2249         return err;
2250
2251 err_wedged:
2252         igt_spinner_end(&b.spin);
2253         igt_spinner_end(&a.spin);
2254         intel_gt_set_wedged(gt);
2255         err = -EIO;
2256         goto err_client_b;
2257 }
2258
2259 static int __i915_sw_fence_call
2260 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2261 {
2262         return NOTIFY_DONE;
2263 }
2264
2265 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2266 {
2267         struct i915_request *rq;
2268
2269         rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2270         if (!rq)
2271                 return NULL;
2272
2273         rq->engine = engine;
2274
2275         spin_lock_init(&rq->lock);
2276         INIT_LIST_HEAD(&rq->fence.cb_list);
2277         rq->fence.lock = &rq->lock;
2278         rq->fence.ops = &i915_fence_ops;
2279
2280         i915_sched_node_init(&rq->sched);
2281
2282         /* mark this request as permanently incomplete */
2283         rq->fence.seqno = 1;
2284         BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2285         rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2286         GEM_BUG_ON(i915_request_completed(rq));
2287
2288         i915_sw_fence_init(&rq->submit, dummy_notify);
2289         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2290
2291         spin_lock_init(&rq->lock);
2292         rq->fence.lock = &rq->lock;
2293         INIT_LIST_HEAD(&rq->fence.cb_list);
2294
2295         return rq;
2296 }
2297
2298 static void dummy_request_free(struct i915_request *dummy)
2299 {
2300         /* We have to fake the CS interrupt to kick the next request */
2301         i915_sw_fence_commit(&dummy->submit);
2302
2303         i915_request_mark_complete(dummy);
2304         dma_fence_signal(&dummy->fence);
2305
2306         i915_sched_node_fini(&dummy->sched);
2307         i915_sw_fence_fini(&dummy->submit);
2308
2309         dma_fence_free(&dummy->fence);
2310 }
2311
2312 static int live_suppress_wait_preempt(void *arg)
2313 {
2314         struct intel_gt *gt = arg;
2315         struct preempt_client client[4];
2316         struct i915_request *rq[ARRAY_SIZE(client)] = {};
2317         struct intel_engine_cs *engine;
2318         enum intel_engine_id id;
2319         int err = -ENOMEM;
2320         int i;
2321
2322         /*
2323          * Waiters are given a little priority nudge, but not enough
2324          * to actually cause any preemption. Double check that we do
2325          * not needlessly generate preempt-to-idle cycles.
2326          */
2327
2328         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2329                 return 0;
2330
2331         if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2332                 return -ENOMEM;
2333         if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2334                 goto err_client_0;
2335         if (preempt_client_init(gt, &client[2])) /* head of queue */
2336                 goto err_client_1;
2337         if (preempt_client_init(gt, &client[3])) /* bystander */
2338                 goto err_client_2;
2339
2340         for_each_engine(engine, gt, id) {
2341                 int depth;
2342
2343                 if (!intel_engine_has_preemption(engine))
2344                         continue;
2345
2346                 if (!engine->emit_init_breadcrumb)
2347                         continue;
2348
2349                 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2350                         struct i915_request *dummy;
2351
2352                         engine->execlists.preempt_hang.count = 0;
2353
2354                         dummy = dummy_request(engine);
2355                         if (!dummy)
2356                                 goto err_client_3;
2357
2358                         for (i = 0; i < ARRAY_SIZE(client); i++) {
2359                                 struct i915_request *this;
2360
2361                                 this = spinner_create_request(&client[i].spin,
2362                                                               client[i].ctx, engine,
2363                                                               MI_NOOP);
2364                                 if (IS_ERR(this)) {
2365                                         err = PTR_ERR(this);
2366                                         goto err_wedged;
2367                                 }
2368
2369                                 /* Disable NEWCLIENT promotion */
2370                                 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2371                                                         &dummy->fence);
2372
2373                                 rq[i] = i915_request_get(this);
2374                                 i915_request_add(this);
2375                         }
2376
2377                         dummy_request_free(dummy);
2378
2379                         GEM_BUG_ON(i915_request_completed(rq[0]));
2380                         if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2381                                 pr_err("%s: First client failed to start\n",
2382                                        engine->name);
2383                                 goto err_wedged;
2384                         }
2385                         GEM_BUG_ON(!i915_request_started(rq[0]));
2386
2387                         if (i915_request_wait(rq[depth],
2388                                               I915_WAIT_PRIORITY,
2389                                               1) != -ETIME) {
2390                                 pr_err("%s: Waiter depth:%d completed!\n",
2391                                        engine->name, depth);
2392                                 goto err_wedged;
2393                         }
2394
2395                         for (i = 0; i < ARRAY_SIZE(client); i++) {
2396                                 igt_spinner_end(&client[i].spin);
2397                                 i915_request_put(rq[i]);
2398                                 rq[i] = NULL;
2399                         }
2400
2401                         if (igt_flush_test(gt->i915))
2402                                 goto err_wedged;
2403
2404                         if (engine->execlists.preempt_hang.count) {
2405                                 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2406                                        engine->name,
2407                                        engine->execlists.preempt_hang.count,
2408                                        depth);
2409                                 err = -EINVAL;
2410                                 goto err_client_3;
2411                         }
2412                 }
2413         }
2414
2415         err = 0;
2416 err_client_3:
2417         preempt_client_fini(&client[3]);
2418 err_client_2:
2419         preempt_client_fini(&client[2]);
2420 err_client_1:
2421         preempt_client_fini(&client[1]);
2422 err_client_0:
2423         preempt_client_fini(&client[0]);
2424         return err;
2425
2426 err_wedged:
2427         for (i = 0; i < ARRAY_SIZE(client); i++) {
2428                 igt_spinner_end(&client[i].spin);
2429                 i915_request_put(rq[i]);
2430         }
2431         intel_gt_set_wedged(gt);
2432         err = -EIO;
2433         goto err_client_3;
2434 }
2435
2436 static int live_chain_preempt(void *arg)
2437 {
2438         struct intel_gt *gt = arg;
2439         struct intel_engine_cs *engine;
2440         struct preempt_client hi, lo;
2441         enum intel_engine_id id;
2442         int err = -ENOMEM;
2443
2444         /*
2445          * Build a chain AB...BA between two contexts (A, B) and request
2446          * preemption of the last request. It should then complete before
2447          * the previously submitted spinner in B.
2448          */
2449
2450         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2451                 return 0;
2452
2453         if (preempt_client_init(gt, &hi))
2454                 return -ENOMEM;
2455
2456         if (preempt_client_init(gt, &lo))
2457                 goto err_client_hi;
2458
2459         for_each_engine(engine, gt, id) {
2460                 struct i915_sched_attr attr = {
2461                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2462                 };
2463                 struct igt_live_test t;
2464                 struct i915_request *rq;
2465                 int ring_size, count, i;
2466
2467                 if (!intel_engine_has_preemption(engine))
2468                         continue;
2469
2470                 rq = spinner_create_request(&lo.spin,
2471                                             lo.ctx, engine,
2472                                             MI_ARB_CHECK);
2473                 if (IS_ERR(rq))
2474                         goto err_wedged;
2475
2476                 i915_request_get(rq);
2477                 i915_request_add(rq);
2478
2479                 ring_size = rq->wa_tail - rq->head;
2480                 if (ring_size < 0)
2481                         ring_size += rq->ring->size;
2482                 ring_size = rq->ring->size / ring_size;
2483                 pr_debug("%s(%s): Using maximum of %d requests\n",
2484                          __func__, engine->name, ring_size);
2485
2486                 igt_spinner_end(&lo.spin);
2487                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2488                         pr_err("Timed out waiting to flush %s\n", engine->name);
2489                         i915_request_put(rq);
2490                         goto err_wedged;
2491                 }
2492                 i915_request_put(rq);
2493
2494                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2495                         err = -EIO;
2496                         goto err_wedged;
2497                 }
2498
2499                 for_each_prime_number_from(count, 1, ring_size) {
2500                         rq = spinner_create_request(&hi.spin,
2501                                                     hi.ctx, engine,
2502                                                     MI_ARB_CHECK);
2503                         if (IS_ERR(rq))
2504                                 goto err_wedged;
2505                         i915_request_add(rq);
2506                         if (!igt_wait_for_spinner(&hi.spin, rq))
2507                                 goto err_wedged;
2508
2509                         rq = spinner_create_request(&lo.spin,
2510                                                     lo.ctx, engine,
2511                                                     MI_ARB_CHECK);
2512                         if (IS_ERR(rq))
2513                                 goto err_wedged;
2514                         i915_request_add(rq);
2515
2516                         for (i = 0; i < count; i++) {
2517                                 rq = igt_request_alloc(lo.ctx, engine);
2518                                 if (IS_ERR(rq))
2519                                         goto err_wedged;
2520                                 i915_request_add(rq);
2521                         }
2522
2523                         rq = igt_request_alloc(hi.ctx, engine);
2524                         if (IS_ERR(rq))
2525                                 goto err_wedged;
2526
2527                         i915_request_get(rq);
2528                         i915_request_add(rq);
2529                         engine->schedule(rq, &attr);
2530
2531                         igt_spinner_end(&hi.spin);
2532                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2533                                 struct drm_printer p =
2534                                         drm_info_printer(gt->i915->drm.dev);
2535
2536                                 pr_err("Failed to preempt over chain of %d\n",
2537                                        count);
2538                                 intel_engine_dump(engine, &p,
2539                                                   "%s\n", engine->name);
2540                                 i915_request_put(rq);
2541                                 goto err_wedged;
2542                         }
2543                         igt_spinner_end(&lo.spin);
2544                         i915_request_put(rq);
2545
2546                         rq = igt_request_alloc(lo.ctx, engine);
2547                         if (IS_ERR(rq))
2548                                 goto err_wedged;
2549
2550                         i915_request_get(rq);
2551                         i915_request_add(rq);
2552
2553                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2554                                 struct drm_printer p =
2555                                         drm_info_printer(gt->i915->drm.dev);
2556
2557                                 pr_err("Failed to flush low priority chain of %d requests\n",
2558                                        count);
2559                                 intel_engine_dump(engine, &p,
2560                                                   "%s\n", engine->name);
2561
2562                                 i915_request_put(rq);
2563                                 goto err_wedged;
2564                         }
2565                         i915_request_put(rq);
2566                 }
2567
2568                 if (igt_live_test_end(&t)) {
2569                         err = -EIO;
2570                         goto err_wedged;
2571                 }
2572         }
2573
2574         err = 0;
2575 err_client_lo:
2576         preempt_client_fini(&lo);
2577 err_client_hi:
2578         preempt_client_fini(&hi);
2579         return err;
2580
2581 err_wedged:
2582         igt_spinner_end(&hi.spin);
2583         igt_spinner_end(&lo.spin);
2584         intel_gt_set_wedged(gt);
2585         err = -EIO;
2586         goto err_client_lo;
2587 }
2588
2589 static int create_gang(struct intel_engine_cs *engine,
2590                        struct i915_request **prev)
2591 {
2592         struct drm_i915_gem_object *obj;
2593         struct intel_context *ce;
2594         struct i915_request *rq;
2595         struct i915_vma *vma;
2596         u32 *cs;
2597         int err;
2598
2599         ce = intel_context_create(engine);
2600         if (IS_ERR(ce))
2601                 return PTR_ERR(ce);
2602
2603         obj = i915_gem_object_create_internal(engine->i915, 4096);
2604         if (IS_ERR(obj)) {
2605                 err = PTR_ERR(obj);
2606                 goto err_ce;
2607         }
2608
2609         vma = i915_vma_instance(obj, ce->vm, NULL);
2610         if (IS_ERR(vma)) {
2611                 err = PTR_ERR(vma);
2612                 goto err_obj;
2613         }
2614
2615         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2616         if (err)
2617                 goto err_obj;
2618
2619         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2620         if (IS_ERR(cs))
2621                 goto err_obj;
2622
2623         /* Semaphore target: spin until zero */
2624         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2625
2626         *cs++ = MI_SEMAPHORE_WAIT |
2627                 MI_SEMAPHORE_POLL |
2628                 MI_SEMAPHORE_SAD_EQ_SDD;
2629         *cs++ = 0;
2630         *cs++ = lower_32_bits(vma->node.start);
2631         *cs++ = upper_32_bits(vma->node.start);
2632
2633         if (*prev) {
2634                 u64 offset = (*prev)->batch->node.start;
2635
2636                 /* Terminate the spinner in the next lower priority batch. */
2637                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2638                 *cs++ = lower_32_bits(offset);
2639                 *cs++ = upper_32_bits(offset);
2640                 *cs++ = 0;
2641         }
2642
2643         *cs++ = MI_BATCH_BUFFER_END;
2644         i915_gem_object_flush_map(obj);
2645         i915_gem_object_unpin_map(obj);
2646
2647         rq = intel_context_create_request(ce);
2648         if (IS_ERR(rq))
2649                 goto err_obj;
2650
2651         rq->batch = i915_vma_get(vma);
2652         i915_request_get(rq);
2653
2654         i915_vma_lock(vma);
2655         err = i915_request_await_object(rq, vma->obj, false);
2656         if (!err)
2657                 err = i915_vma_move_to_active(vma, rq, 0);
2658         if (!err)
2659                 err = rq->engine->emit_bb_start(rq,
2660                                                 vma->node.start,
2661                                                 PAGE_SIZE, 0);
2662         i915_vma_unlock(vma);
2663         i915_request_add(rq);
2664         if (err)
2665                 goto err_rq;
2666
2667         i915_gem_object_put(obj);
2668         intel_context_put(ce);
2669
2670         rq->client_link.next = &(*prev)->client_link;
2671         *prev = rq;
2672         return 0;
2673
2674 err_rq:
2675         i915_vma_put(rq->batch);
2676         i915_request_put(rq);
2677 err_obj:
2678         i915_gem_object_put(obj);
2679 err_ce:
2680         intel_context_put(ce);
2681         return err;
2682 }
2683
2684 static int live_preempt_gang(void *arg)
2685 {
2686         struct intel_gt *gt = arg;
2687         struct intel_engine_cs *engine;
2688         enum intel_engine_id id;
2689
2690         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2691                 return 0;
2692
2693         /*
2694          * Build as long a chain of preempters as we can, with each
2695          * request higher priority than the last. Once we are ready, we release
2696          * the last batch which then precolates down the chain, each releasing
2697          * the next oldest in turn. The intent is to simply push as hard as we
2698          * can with the number of preemptions, trying to exceed narrow HW
2699          * limits. At a minimum, we insist that we can sort all the user
2700          * high priority levels into execution order.
2701          */
2702
2703         for_each_engine(engine, gt, id) {
2704                 struct i915_request *rq = NULL;
2705                 struct igt_live_test t;
2706                 IGT_TIMEOUT(end_time);
2707                 int prio = 0;
2708                 int err = 0;
2709                 u32 *cs;
2710
2711                 if (!intel_engine_has_preemption(engine))
2712                         continue;
2713
2714                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2715                         return -EIO;
2716
2717                 do {
2718                         struct i915_sched_attr attr = {
2719                                 .priority = I915_USER_PRIORITY(prio++),
2720                         };
2721
2722                         err = create_gang(engine, &rq);
2723                         if (err)
2724                                 break;
2725
2726                         /* Submit each spinner at increasing priority */
2727                         engine->schedule(rq, &attr);
2728
2729                         if (prio <= I915_PRIORITY_MAX)
2730                                 continue;
2731
2732                         if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2733                                 break;
2734
2735                         if (__igt_timeout(end_time, NULL))
2736                                 break;
2737                 } while (1);
2738                 pr_debug("%s: Preempt chain of %d requests\n",
2739                          engine->name, prio);
2740
2741                 /*
2742                  * Such that the last spinner is the highest priority and
2743                  * should execute first. When that spinner completes,
2744                  * it will terminate the next lowest spinner until there
2745                  * are no more spinners and the gang is complete.
2746                  */
2747                 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2748                 if (!IS_ERR(cs)) {
2749                         *cs = 0;
2750                         i915_gem_object_unpin_map(rq->batch->obj);
2751                 } else {
2752                         err = PTR_ERR(cs);
2753                         intel_gt_set_wedged(gt);
2754                 }
2755
2756                 while (rq) { /* wait for each rq from highest to lowest prio */
2757                         struct i915_request *n =
2758                                 list_next_entry(rq, client_link);
2759
2760                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2761                                 struct drm_printer p =
2762                                         drm_info_printer(engine->i915->drm.dev);
2763
2764                                 pr_err("Failed to flush chain of %d requests, at %d\n",
2765                                        prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2766                                 intel_engine_dump(engine, &p,
2767                                                   "%s\n", engine->name);
2768
2769                                 err = -ETIME;
2770                         }
2771
2772                         i915_vma_put(rq->batch);
2773                         i915_request_put(rq);
2774                         rq = n;
2775                 }
2776
2777                 if (igt_live_test_end(&t))
2778                         err = -EIO;
2779                 if (err)
2780                         return err;
2781         }
2782
2783         return 0;
2784 }
2785
2786 static struct i915_vma *
2787 create_gpr_user(struct intel_engine_cs *engine,
2788                 struct i915_vma *result,
2789                 unsigned int offset)
2790 {
2791         struct drm_i915_gem_object *obj;
2792         struct i915_vma *vma;
2793         u32 *cs;
2794         int err;
2795         int i;
2796
2797         obj = i915_gem_object_create_internal(engine->i915, 4096);
2798         if (IS_ERR(obj))
2799                 return ERR_CAST(obj);
2800
2801         vma = i915_vma_instance(obj, result->vm, NULL);
2802         if (IS_ERR(vma)) {
2803                 i915_gem_object_put(obj);
2804                 return vma;
2805         }
2806
2807         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2808         if (err) {
2809                 i915_vma_put(vma);
2810                 return ERR_PTR(err);
2811         }
2812
2813         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2814         if (IS_ERR(cs)) {
2815                 i915_vma_put(vma);
2816                 return ERR_CAST(cs);
2817         }
2818
2819         /* All GPR are clear for new contexts. We use GPR(0) as a constant */
2820         *cs++ = MI_LOAD_REGISTER_IMM(1);
2821         *cs++ = CS_GPR(engine, 0);
2822         *cs++ = 1;
2823
2824         for (i = 1; i < NUM_GPR; i++) {
2825                 u64 addr;
2826
2827                 /*
2828                  * Perform: GPR[i]++
2829                  *
2830                  * As we read and write into the context saved GPR[i], if
2831                  * we restart this batch buffer from an earlier point, we
2832                  * will repeat the increment and store a value > 1.
2833                  */
2834                 *cs++ = MI_MATH(4);
2835                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
2836                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
2837                 *cs++ = MI_MATH_ADD;
2838                 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
2839
2840                 addr = result->node.start + offset + i * sizeof(*cs);
2841                 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
2842                 *cs++ = CS_GPR(engine, 2 * i);
2843                 *cs++ = lower_32_bits(addr);
2844                 *cs++ = upper_32_bits(addr);
2845
2846                 *cs++ = MI_SEMAPHORE_WAIT |
2847                         MI_SEMAPHORE_POLL |
2848                         MI_SEMAPHORE_SAD_GTE_SDD;
2849                 *cs++ = i;
2850                 *cs++ = lower_32_bits(result->node.start);
2851                 *cs++ = upper_32_bits(result->node.start);
2852         }
2853
2854         *cs++ = MI_BATCH_BUFFER_END;
2855         i915_gem_object_flush_map(obj);
2856         i915_gem_object_unpin_map(obj);
2857
2858         return vma;
2859 }
2860
2861 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
2862 {
2863         struct drm_i915_gem_object *obj;
2864         struct i915_vma *vma;
2865         int err;
2866
2867         obj = i915_gem_object_create_internal(gt->i915, sz);
2868         if (IS_ERR(obj))
2869                 return ERR_CAST(obj);
2870
2871         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
2872         if (IS_ERR(vma)) {
2873                 i915_gem_object_put(obj);
2874                 return vma;
2875         }
2876
2877         err = i915_ggtt_pin(vma, 0, 0);
2878         if (err) {
2879                 i915_vma_put(vma);
2880                 return ERR_PTR(err);
2881         }
2882
2883         return vma;
2884 }
2885
2886 static struct i915_request *
2887 create_gpr_client(struct intel_engine_cs *engine,
2888                   struct i915_vma *global,
2889                   unsigned int offset)
2890 {
2891         struct i915_vma *batch, *vma;
2892         struct intel_context *ce;
2893         struct i915_request *rq;
2894         int err;
2895
2896         ce = intel_context_create(engine);
2897         if (IS_ERR(ce))
2898                 return ERR_CAST(ce);
2899
2900         vma = i915_vma_instance(global->obj, ce->vm, NULL);
2901         if (IS_ERR(vma)) {
2902                 err = PTR_ERR(vma);
2903                 goto out_ce;
2904         }
2905
2906         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2907         if (err)
2908                 goto out_ce;
2909
2910         batch = create_gpr_user(engine, vma, offset);
2911         if (IS_ERR(batch)) {
2912                 err = PTR_ERR(batch);
2913                 goto out_vma;
2914         }
2915
2916         rq = intel_context_create_request(ce);
2917         if (IS_ERR(rq)) {
2918                 err = PTR_ERR(rq);
2919                 goto out_batch;
2920         }
2921
2922         i915_vma_lock(vma);
2923         err = i915_request_await_object(rq, vma->obj, false);
2924         if (!err)
2925                 err = i915_vma_move_to_active(vma, rq, 0);
2926         i915_vma_unlock(vma);
2927
2928         i915_vma_lock(batch);
2929         if (!err)
2930                 err = i915_request_await_object(rq, batch->obj, false);
2931         if (!err)
2932                 err = i915_vma_move_to_active(batch, rq, 0);
2933         if (!err)
2934                 err = rq->engine->emit_bb_start(rq,
2935                                                 batch->node.start,
2936                                                 PAGE_SIZE, 0);
2937         i915_vma_unlock(batch);
2938         i915_vma_unpin(batch);
2939
2940         if (!err)
2941                 i915_request_get(rq);
2942         i915_request_add(rq);
2943
2944 out_batch:
2945         i915_vma_put(batch);
2946 out_vma:
2947         i915_vma_unpin(vma);
2948 out_ce:
2949         intel_context_put(ce);
2950         return err ? ERR_PTR(err) : rq;
2951 }
2952
2953 static int preempt_user(struct intel_engine_cs *engine,
2954                         struct i915_vma *global,
2955                         int id)
2956 {
2957         struct i915_sched_attr attr = {
2958                 .priority = I915_PRIORITY_MAX
2959         };
2960         struct i915_request *rq;
2961         int err = 0;
2962         u32 *cs;
2963
2964         rq = intel_engine_create_kernel_request(engine);
2965         if (IS_ERR(rq))
2966                 return PTR_ERR(rq);
2967
2968         cs = intel_ring_begin(rq, 4);
2969         if (IS_ERR(cs)) {
2970                 i915_request_add(rq);
2971                 return PTR_ERR(cs);
2972         }
2973
2974         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2975         *cs++ = i915_ggtt_offset(global);
2976         *cs++ = 0;
2977         *cs++ = id;
2978
2979         intel_ring_advance(rq, cs);
2980
2981         i915_request_get(rq);
2982         i915_request_add(rq);
2983
2984         engine->schedule(rq, &attr);
2985
2986         if (i915_request_wait(rq, 0, HZ / 2) < 0)
2987                 err = -ETIME;
2988         i915_request_put(rq);
2989
2990         return err;
2991 }
2992
2993 static int live_preempt_user(void *arg)
2994 {
2995         struct intel_gt *gt = arg;
2996         struct intel_engine_cs *engine;
2997         struct i915_vma *global;
2998         enum intel_engine_id id;
2999         u32 *result;
3000         int err = 0;
3001
3002         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3003                 return 0;
3004
3005         /*
3006          * In our other tests, we look at preemption in carefully
3007          * controlled conditions in the ringbuffer. Since most of the
3008          * time is spent in user batches, most of our preemptions naturally
3009          * occur there. We want to verify that when we preempt inside a batch
3010          * we continue on from the current instruction and do not roll back
3011          * to the start, or another earlier arbitration point.
3012          *
3013          * To verify this, we create a batch which is a mixture of
3014          * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3015          * a few preempting contexts thrown into the mix, we look for any
3016          * repeated instructions (which show up as incorrect values).
3017          */
3018
3019         global = create_global(gt, 4096);
3020         if (IS_ERR(global))
3021                 return PTR_ERR(global);
3022
3023         result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3024         if (IS_ERR(result)) {
3025                 i915_vma_unpin_and_release(&global, 0);
3026                 return PTR_ERR(result);
3027         }
3028
3029         for_each_engine(engine, gt, id) {
3030                 struct i915_request *client[3] = {};
3031                 struct igt_live_test t;
3032                 int i;
3033
3034                 if (!intel_engine_has_preemption(engine))
3035                         continue;
3036
3037                 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3038                         continue; /* we need per-context GPR */
3039
3040                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3041                         err = -EIO;
3042                         break;
3043                 }
3044
3045                 memset(result, 0, 4096);
3046
3047                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3048                         struct i915_request *rq;
3049
3050                         rq = create_gpr_client(engine, global,
3051                                                NUM_GPR * i * sizeof(u32));
3052                         if (IS_ERR(rq))
3053                                 goto end_test;
3054
3055                         client[i] = rq;
3056                 }
3057
3058                 /* Continuously preempt the set of 3 running contexts */
3059                 for (i = 1; i <= NUM_GPR; i++) {
3060                         err = preempt_user(engine, global, i);
3061                         if (err)
3062                                 goto end_test;
3063                 }
3064
3065                 if (READ_ONCE(result[0]) != NUM_GPR) {
3066                         pr_err("%s: Failed to release semaphore\n",
3067                                engine->name);
3068                         err = -EIO;
3069                         goto end_test;
3070                 }
3071
3072                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3073                         int gpr;
3074
3075                         if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3076                                 err = -ETIME;
3077                                 goto end_test;
3078                         }
3079
3080                         for (gpr = 1; gpr < NUM_GPR; gpr++) {
3081                                 if (result[NUM_GPR * i + gpr] != 1) {
3082                                         pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3083                                                engine->name,
3084                                                i, gpr, result[NUM_GPR * i + gpr]);
3085                                         err = -EINVAL;
3086                                         goto end_test;
3087                                 }
3088                         }
3089                 }
3090
3091 end_test:
3092                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3093                         if (!client[i])
3094                                 break;
3095
3096                         i915_request_put(client[i]);
3097                 }
3098
3099                 /* Flush the semaphores on error */
3100                 smp_store_mb(result[0], -1);
3101                 if (igt_live_test_end(&t))
3102                         err = -EIO;
3103                 if (err)
3104                         break;
3105         }
3106
3107         i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3108         return err;
3109 }
3110
3111 static int live_preempt_timeout(void *arg)
3112 {
3113         struct intel_gt *gt = arg;
3114         struct i915_gem_context *ctx_hi, *ctx_lo;
3115         struct igt_spinner spin_lo;
3116         struct intel_engine_cs *engine;
3117         enum intel_engine_id id;
3118         int err = -ENOMEM;
3119
3120         /*
3121          * Check that we force preemption to occur by cancelling the previous
3122          * context if it refuses to yield the GPU.
3123          */
3124         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3125                 return 0;
3126
3127         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3128                 return 0;
3129
3130         if (!intel_has_reset_engine(gt))
3131                 return 0;
3132
3133         if (igt_spinner_init(&spin_lo, gt))
3134                 return -ENOMEM;
3135
3136         ctx_hi = kernel_context(gt->i915);
3137         if (!ctx_hi)
3138                 goto err_spin_lo;
3139         ctx_hi->sched.priority =
3140                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3141
3142         ctx_lo = kernel_context(gt->i915);
3143         if (!ctx_lo)
3144                 goto err_ctx_hi;
3145         ctx_lo->sched.priority =
3146                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3147
3148         for_each_engine(engine, gt, id) {
3149                 unsigned long saved_timeout;
3150                 struct i915_request *rq;
3151
3152                 if (!intel_engine_has_preemption(engine))
3153                         continue;
3154
3155                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3156                                             MI_NOOP); /* preemption disabled */
3157                 if (IS_ERR(rq)) {
3158                         err = PTR_ERR(rq);
3159                         goto err_ctx_lo;
3160                 }
3161
3162                 i915_request_add(rq);
3163                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3164                         intel_gt_set_wedged(gt);
3165                         err = -EIO;
3166                         goto err_ctx_lo;
3167                 }
3168
3169                 rq = igt_request_alloc(ctx_hi, engine);
3170                 if (IS_ERR(rq)) {
3171                         igt_spinner_end(&spin_lo);
3172                         err = PTR_ERR(rq);
3173                         goto err_ctx_lo;
3174                 }
3175
3176                 /* Flush the previous CS ack before changing timeouts */
3177                 while (READ_ONCE(engine->execlists.pending[0]))
3178                         cpu_relax();
3179
3180                 saved_timeout = engine->props.preempt_timeout_ms;
3181                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3182
3183                 i915_request_get(rq);
3184                 i915_request_add(rq);
3185
3186                 intel_engine_flush_submission(engine);
3187                 engine->props.preempt_timeout_ms = saved_timeout;
3188
3189                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3190                         intel_gt_set_wedged(gt);
3191                         i915_request_put(rq);
3192                         err = -ETIME;
3193                         goto err_ctx_lo;
3194                 }
3195
3196                 igt_spinner_end(&spin_lo);
3197                 i915_request_put(rq);
3198         }
3199
3200         err = 0;
3201 err_ctx_lo:
3202         kernel_context_close(ctx_lo);
3203 err_ctx_hi:
3204         kernel_context_close(ctx_hi);
3205 err_spin_lo:
3206         igt_spinner_fini(&spin_lo);
3207         return err;
3208 }
3209
3210 static int random_range(struct rnd_state *rnd, int min, int max)
3211 {
3212         return i915_prandom_u32_max_state(max - min, rnd) + min;
3213 }
3214
3215 static int random_priority(struct rnd_state *rnd)
3216 {
3217         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3218 }
3219
3220 struct preempt_smoke {
3221         struct intel_gt *gt;
3222         struct i915_gem_context **contexts;
3223         struct intel_engine_cs *engine;
3224         struct drm_i915_gem_object *batch;
3225         unsigned int ncontext;
3226         struct rnd_state prng;
3227         unsigned long count;
3228 };
3229
3230 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3231 {
3232         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3233                                                           &smoke->prng)];
3234 }
3235
3236 static int smoke_submit(struct preempt_smoke *smoke,
3237                         struct i915_gem_context *ctx, int prio,
3238                         struct drm_i915_gem_object *batch)
3239 {
3240         struct i915_request *rq;
3241         struct i915_vma *vma = NULL;
3242         int err = 0;
3243
3244         if (batch) {
3245                 struct i915_address_space *vm;
3246
3247                 vm = i915_gem_context_get_vm_rcu(ctx);
3248                 vma = i915_vma_instance(batch, vm, NULL);
3249                 i915_vm_put(vm);
3250                 if (IS_ERR(vma))
3251                         return PTR_ERR(vma);
3252
3253                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3254                 if (err)
3255                         return err;
3256         }
3257
3258         ctx->sched.priority = prio;
3259
3260         rq = igt_request_alloc(ctx, smoke->engine);
3261         if (IS_ERR(rq)) {
3262                 err = PTR_ERR(rq);
3263                 goto unpin;
3264         }
3265
3266         if (vma) {
3267                 i915_vma_lock(vma);
3268                 err = i915_request_await_object(rq, vma->obj, false);
3269                 if (!err)
3270                         err = i915_vma_move_to_active(vma, rq, 0);
3271                 if (!err)
3272                         err = rq->engine->emit_bb_start(rq,
3273                                                         vma->node.start,
3274                                                         PAGE_SIZE, 0);
3275                 i915_vma_unlock(vma);
3276         }
3277
3278         i915_request_add(rq);
3279
3280 unpin:
3281         if (vma)
3282                 i915_vma_unpin(vma);
3283
3284         return err;
3285 }
3286
3287 static int smoke_crescendo_thread(void *arg)
3288 {
3289         struct preempt_smoke *smoke = arg;
3290         IGT_TIMEOUT(end_time);
3291         unsigned long count;
3292
3293         count = 0;
3294         do {
3295                 struct i915_gem_context *ctx = smoke_context(smoke);
3296                 int err;
3297
3298                 err = smoke_submit(smoke,
3299                                    ctx, count % I915_PRIORITY_MAX,
3300                                    smoke->batch);
3301                 if (err)
3302                         return err;
3303
3304                 count++;
3305         } while (!__igt_timeout(end_time, NULL));
3306
3307         smoke->count = count;
3308         return 0;
3309 }
3310
3311 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3312 #define BATCH BIT(0)
3313 {
3314         struct task_struct *tsk[I915_NUM_ENGINES] = {};
3315         struct preempt_smoke arg[I915_NUM_ENGINES];
3316         struct intel_engine_cs *engine;
3317         enum intel_engine_id id;
3318         unsigned long count;
3319         int err = 0;
3320
3321         for_each_engine(engine, smoke->gt, id) {
3322                 arg[id] = *smoke;
3323                 arg[id].engine = engine;
3324                 if (!(flags & BATCH))
3325                         arg[id].batch = NULL;
3326                 arg[id].count = 0;
3327
3328                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3329                                       "igt/smoke:%d", id);
3330                 if (IS_ERR(tsk[id])) {
3331                         err = PTR_ERR(tsk[id]);
3332                         break;
3333                 }
3334                 get_task_struct(tsk[id]);
3335         }
3336
3337         yield(); /* start all threads before we kthread_stop() */
3338
3339         count = 0;
3340         for_each_engine(engine, smoke->gt, id) {
3341                 int status;
3342
3343                 if (IS_ERR_OR_NULL(tsk[id]))
3344                         continue;
3345
3346                 status = kthread_stop(tsk[id]);
3347                 if (status && !err)
3348                         err = status;
3349
3350                 count += arg[id].count;
3351
3352                 put_task_struct(tsk[id]);
3353         }
3354
3355         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3356                 count, flags,
3357                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3358         return 0;
3359 }
3360
3361 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3362 {
3363         enum intel_engine_id id;
3364         IGT_TIMEOUT(end_time);
3365         unsigned long count;
3366
3367         count = 0;
3368         do {
3369                 for_each_engine(smoke->engine, smoke->gt, id) {
3370                         struct i915_gem_context *ctx = smoke_context(smoke);
3371                         int err;
3372
3373                         err = smoke_submit(smoke,
3374                                            ctx, random_priority(&smoke->prng),
3375                                            flags & BATCH ? smoke->batch : NULL);
3376                         if (err)
3377                                 return err;
3378
3379                         count++;
3380                 }
3381         } while (!__igt_timeout(end_time, NULL));
3382
3383         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3384                 count, flags,
3385                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3386         return 0;
3387 }
3388
3389 static int live_preempt_smoke(void *arg)
3390 {
3391         struct preempt_smoke smoke = {
3392                 .gt = arg,
3393                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3394                 .ncontext = 1024,
3395         };
3396         const unsigned int phase[] = { 0, BATCH };
3397         struct igt_live_test t;
3398         int err = -ENOMEM;
3399         u32 *cs;
3400         int n;
3401
3402         if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3403                 return 0;
3404
3405         smoke.contexts = kmalloc_array(smoke.ncontext,
3406                                        sizeof(*smoke.contexts),
3407                                        GFP_KERNEL);
3408         if (!smoke.contexts)
3409                 return -ENOMEM;
3410
3411         smoke.batch =
3412                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3413         if (IS_ERR(smoke.batch)) {
3414                 err = PTR_ERR(smoke.batch);
3415                 goto err_free;
3416         }
3417
3418         cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3419         if (IS_ERR(cs)) {
3420                 err = PTR_ERR(cs);
3421                 goto err_batch;
3422         }
3423         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3424                 cs[n] = MI_ARB_CHECK;
3425         cs[n] = MI_BATCH_BUFFER_END;
3426         i915_gem_object_flush_map(smoke.batch);
3427         i915_gem_object_unpin_map(smoke.batch);
3428
3429         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3430                 err = -EIO;
3431                 goto err_batch;
3432         }
3433
3434         for (n = 0; n < smoke.ncontext; n++) {
3435                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3436                 if (!smoke.contexts[n])
3437                         goto err_ctx;
3438         }
3439
3440         for (n = 0; n < ARRAY_SIZE(phase); n++) {
3441                 err = smoke_crescendo(&smoke, phase[n]);
3442                 if (err)
3443                         goto err_ctx;
3444
3445                 err = smoke_random(&smoke, phase[n]);
3446                 if (err)
3447                         goto err_ctx;
3448         }
3449
3450 err_ctx:
3451         if (igt_live_test_end(&t))
3452                 err = -EIO;
3453
3454         for (n = 0; n < smoke.ncontext; n++) {
3455                 if (!smoke.contexts[n])
3456                         break;
3457                 kernel_context_close(smoke.contexts[n]);
3458         }
3459
3460 err_batch:
3461         i915_gem_object_put(smoke.batch);
3462 err_free:
3463         kfree(smoke.contexts);
3464
3465         return err;
3466 }
3467
3468 static int nop_virtual_engine(struct intel_gt *gt,
3469                               struct intel_engine_cs **siblings,
3470                               unsigned int nsibling,
3471                               unsigned int nctx,
3472                               unsigned int flags)
3473 #define CHAIN BIT(0)
3474 {
3475         IGT_TIMEOUT(end_time);
3476         struct i915_request *request[16] = {};
3477         struct intel_context *ve[16];
3478         unsigned long n, prime, nc;
3479         struct igt_live_test t;
3480         ktime_t times[2] = {};
3481         int err;
3482
3483         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3484
3485         for (n = 0; n < nctx; n++) {
3486                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3487                 if (IS_ERR(ve[n])) {
3488                         err = PTR_ERR(ve[n]);
3489                         nctx = n;
3490                         goto out;
3491                 }
3492
3493                 err = intel_context_pin(ve[n]);
3494                 if (err) {
3495                         intel_context_put(ve[n]);
3496                         nctx = n;
3497                         goto out;
3498                 }
3499         }
3500
3501         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3502         if (err)
3503                 goto out;
3504
3505         for_each_prime_number_from(prime, 1, 8192) {
3506                 times[1] = ktime_get_raw();
3507
3508                 if (flags & CHAIN) {
3509                         for (nc = 0; nc < nctx; nc++) {
3510                                 for (n = 0; n < prime; n++) {
3511                                         struct i915_request *rq;
3512
3513                                         rq = i915_request_create(ve[nc]);
3514                                         if (IS_ERR(rq)) {
3515                                                 err = PTR_ERR(rq);
3516                                                 goto out;
3517                                         }
3518
3519                                         if (request[nc])
3520                                                 i915_request_put(request[nc]);
3521                                         request[nc] = i915_request_get(rq);
3522                                         i915_request_add(rq);
3523                                 }
3524                         }
3525                 } else {
3526                         for (n = 0; n < prime; n++) {
3527                                 for (nc = 0; nc < nctx; nc++) {
3528                                         struct i915_request *rq;
3529
3530                                         rq = i915_request_create(ve[nc]);
3531                                         if (IS_ERR(rq)) {
3532                                                 err = PTR_ERR(rq);
3533                                                 goto out;
3534                                         }
3535
3536                                         if (request[nc])
3537                                                 i915_request_put(request[nc]);
3538                                         request[nc] = i915_request_get(rq);
3539                                         i915_request_add(rq);
3540                                 }
3541                         }
3542                 }
3543
3544                 for (nc = 0; nc < nctx; nc++) {
3545                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3546                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3547                                        __func__, ve[0]->engine->name,
3548                                        request[nc]->fence.context,
3549                                        request[nc]->fence.seqno);
3550
3551                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3552                                           __func__, ve[0]->engine->name,
3553                                           request[nc]->fence.context,
3554                                           request[nc]->fence.seqno);
3555                                 GEM_TRACE_DUMP();
3556                                 intel_gt_set_wedged(gt);
3557                                 break;
3558                         }
3559                 }
3560
3561                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3562                 if (prime == 1)
3563                         times[0] = times[1];
3564
3565                 for (nc = 0; nc < nctx; nc++) {
3566                         i915_request_put(request[nc]);
3567                         request[nc] = NULL;
3568                 }
3569
3570                 if (__igt_timeout(end_time, NULL))
3571                         break;
3572         }
3573
3574         err = igt_live_test_end(&t);
3575         if (err)
3576                 goto out;
3577
3578         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3579                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3580                 prime, div64_u64(ktime_to_ns(times[1]), prime));
3581
3582 out:
3583         if (igt_flush_test(gt->i915))
3584                 err = -EIO;
3585
3586         for (nc = 0; nc < nctx; nc++) {
3587                 i915_request_put(request[nc]);
3588                 intel_context_unpin(ve[nc]);
3589                 intel_context_put(ve[nc]);
3590         }
3591         return err;
3592 }
3593
3594 static int live_virtual_engine(void *arg)
3595 {
3596         struct intel_gt *gt = arg;
3597         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3598         struct intel_engine_cs *engine;
3599         enum intel_engine_id id;
3600         unsigned int class, inst;
3601         int err;
3602
3603         if (intel_uc_uses_guc_submission(&gt->uc))
3604                 return 0;
3605
3606         for_each_engine(engine, gt, id) {
3607                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3608                 if (err) {
3609                         pr_err("Failed to wrap engine %s: err=%d\n",
3610                                engine->name, err);
3611                         return err;
3612                 }
3613         }
3614
3615         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3616                 int nsibling, n;
3617
3618                 nsibling = 0;
3619                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3620                         if (!gt->engine_class[class][inst])
3621                                 continue;
3622
3623                         siblings[nsibling++] = gt->engine_class[class][inst];
3624                 }
3625                 if (nsibling < 2)
3626                         continue;
3627
3628                 for (n = 1; n <= nsibling + 1; n++) {
3629                         err = nop_virtual_engine(gt, siblings, nsibling,
3630                                                  n, 0);
3631                         if (err)
3632                                 return err;
3633                 }
3634
3635                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3636                 if (err)
3637                         return err;
3638         }
3639
3640         return 0;
3641 }
3642
3643 static int mask_virtual_engine(struct intel_gt *gt,
3644                                struct intel_engine_cs **siblings,
3645                                unsigned int nsibling)
3646 {
3647         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3648         struct intel_context *ve;
3649         struct igt_live_test t;
3650         unsigned int n;
3651         int err;
3652
3653         /*
3654          * Check that by setting the execution mask on a request, we can
3655          * restrict it to our desired engine within the virtual engine.
3656          */
3657
3658         ve = intel_execlists_create_virtual(siblings, nsibling);
3659         if (IS_ERR(ve)) {
3660                 err = PTR_ERR(ve);
3661                 goto out_close;
3662         }
3663
3664         err = intel_context_pin(ve);
3665         if (err)
3666                 goto out_put;
3667
3668         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3669         if (err)
3670                 goto out_unpin;
3671
3672         for (n = 0; n < nsibling; n++) {
3673                 request[n] = i915_request_create(ve);
3674                 if (IS_ERR(request[n])) {
3675                         err = PTR_ERR(request[n]);
3676                         nsibling = n;
3677                         goto out;
3678                 }
3679
3680                 /* Reverse order as it's more likely to be unnatural */
3681                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3682
3683                 i915_request_get(request[n]);
3684                 i915_request_add(request[n]);
3685         }
3686
3687         for (n = 0; n < nsibling; n++) {
3688                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3689                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
3690                                __func__, ve->engine->name,
3691                                request[n]->fence.context,
3692                                request[n]->fence.seqno);
3693
3694                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3695                                   __func__, ve->engine->name,
3696                                   request[n]->fence.context,
3697                                   request[n]->fence.seqno);
3698                         GEM_TRACE_DUMP();
3699                         intel_gt_set_wedged(gt);
3700                         err = -EIO;
3701                         goto out;
3702                 }
3703
3704                 if (request[n]->engine != siblings[nsibling - n - 1]) {
3705                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3706                                request[n]->engine->name,
3707                                siblings[nsibling - n - 1]->name);
3708                         err = -EINVAL;
3709                         goto out;
3710                 }
3711         }
3712
3713         err = igt_live_test_end(&t);
3714 out:
3715         if (igt_flush_test(gt->i915))
3716                 err = -EIO;
3717
3718         for (n = 0; n < nsibling; n++)
3719                 i915_request_put(request[n]);
3720
3721 out_unpin:
3722         intel_context_unpin(ve);
3723 out_put:
3724         intel_context_put(ve);
3725 out_close:
3726         return err;
3727 }
3728
3729 static int live_virtual_mask(void *arg)
3730 {
3731         struct intel_gt *gt = arg;
3732         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3733         unsigned int class, inst;
3734         int err;
3735
3736         if (intel_uc_uses_guc_submission(&gt->uc))
3737                 return 0;
3738
3739         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3740                 unsigned int nsibling;
3741
3742                 nsibling = 0;
3743                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3744                         if (!gt->engine_class[class][inst])
3745                                 break;
3746
3747                         siblings[nsibling++] = gt->engine_class[class][inst];
3748                 }
3749                 if (nsibling < 2)
3750                         continue;
3751
3752                 err = mask_virtual_engine(gt, siblings, nsibling);
3753                 if (err)
3754                         return err;
3755         }
3756
3757         return 0;
3758 }
3759
3760 static int preserved_virtual_engine(struct intel_gt *gt,
3761                                     struct intel_engine_cs **siblings,
3762                                     unsigned int nsibling)
3763 {
3764         struct i915_request *last = NULL;
3765         struct intel_context *ve;
3766         struct i915_vma *scratch;
3767         struct igt_live_test t;
3768         unsigned int n;
3769         int err = 0;
3770         u32 *cs;
3771
3772         scratch = create_scratch(siblings[0]->gt);
3773         if (IS_ERR(scratch))
3774                 return PTR_ERR(scratch);
3775
3776         err = i915_vma_sync(scratch);
3777         if (err)
3778                 goto out_scratch;
3779
3780         ve = intel_execlists_create_virtual(siblings, nsibling);
3781         if (IS_ERR(ve)) {
3782                 err = PTR_ERR(ve);
3783                 goto out_scratch;
3784         }
3785
3786         err = intel_context_pin(ve);
3787         if (err)
3788                 goto out_put;
3789
3790         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3791         if (err)
3792                 goto out_unpin;
3793
3794         for (n = 0; n < NUM_GPR_DW; n++) {
3795                 struct intel_engine_cs *engine = siblings[n % nsibling];
3796                 struct i915_request *rq;
3797
3798                 rq = i915_request_create(ve);
3799                 if (IS_ERR(rq)) {
3800                         err = PTR_ERR(rq);
3801                         goto out_end;
3802                 }
3803
3804                 i915_request_put(last);
3805                 last = i915_request_get(rq);
3806
3807                 cs = intel_ring_begin(rq, 8);
3808                 if (IS_ERR(cs)) {
3809                         i915_request_add(rq);
3810                         err = PTR_ERR(cs);
3811                         goto out_end;
3812                 }
3813
3814                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3815                 *cs++ = CS_GPR(engine, n);
3816                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3817                 *cs++ = 0;
3818
3819                 *cs++ = MI_LOAD_REGISTER_IMM(1);
3820                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3821                 *cs++ = n + 1;
3822
3823                 *cs++ = MI_NOOP;
3824                 intel_ring_advance(rq, cs);
3825
3826                 /* Restrict this request to run on a particular engine */
3827                 rq->execution_mask = engine->mask;
3828                 i915_request_add(rq);
3829         }
3830
3831         if (i915_request_wait(last, 0, HZ / 5) < 0) {
3832                 err = -ETIME;
3833                 goto out_end;
3834         }
3835
3836         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3837         if (IS_ERR(cs)) {
3838                 err = PTR_ERR(cs);
3839                 goto out_end;
3840         }
3841
3842         for (n = 0; n < NUM_GPR_DW; n++) {
3843                 if (cs[n] != n) {
3844                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
3845                                cs[n], n);
3846                         err = -EINVAL;
3847                         break;
3848                 }
3849         }
3850
3851         i915_gem_object_unpin_map(scratch->obj);
3852
3853 out_end:
3854         if (igt_live_test_end(&t))
3855                 err = -EIO;
3856         i915_request_put(last);
3857 out_unpin:
3858         intel_context_unpin(ve);
3859 out_put:
3860         intel_context_put(ve);
3861 out_scratch:
3862         i915_vma_unpin_and_release(&scratch, 0);
3863         return err;
3864 }
3865
3866 static int live_virtual_preserved(void *arg)
3867 {
3868         struct intel_gt *gt = arg;
3869         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3870         unsigned int class, inst;
3871
3872         /*
3873          * Check that the context image retains non-privileged (user) registers
3874          * from one engine to the next. For this we check that the CS_GPR
3875          * are preserved.
3876          */
3877
3878         if (intel_uc_uses_guc_submission(&gt->uc))
3879                 return 0;
3880
3881         /* As we use CS_GPR we cannot run before they existed on all engines. */
3882         if (INTEL_GEN(gt->i915) < 9)
3883                 return 0;
3884
3885         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3886                 int nsibling, err;
3887
3888                 nsibling = 0;
3889                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3890                         if (!gt->engine_class[class][inst])
3891                                 continue;
3892
3893                         siblings[nsibling++] = gt->engine_class[class][inst];
3894                 }
3895                 if (nsibling < 2)
3896                         continue;
3897
3898                 err = preserved_virtual_engine(gt, siblings, nsibling);
3899                 if (err)
3900                         return err;
3901         }
3902
3903         return 0;
3904 }
3905
3906 static int bond_virtual_engine(struct intel_gt *gt,
3907                                unsigned int class,
3908                                struct intel_engine_cs **siblings,
3909                                unsigned int nsibling,
3910                                unsigned int flags)
3911 #define BOND_SCHEDULE BIT(0)
3912 {
3913         struct intel_engine_cs *master;
3914         struct i915_request *rq[16];
3915         enum intel_engine_id id;
3916         struct igt_spinner spin;
3917         unsigned long n;
3918         int err;
3919
3920         /*
3921          * A set of bonded requests is intended to be run concurrently
3922          * across a number of engines. We use one request per-engine
3923          * and a magic fence to schedule each of the bonded requests
3924          * at the same time. A consequence of our current scheduler is that
3925          * we only move requests to the HW ready queue when the request
3926          * becomes ready, that is when all of its prerequisite fences have
3927          * been signaled. As one of those fences is the master submit fence,
3928          * there is a delay on all secondary fences as the HW may be
3929          * currently busy. Equally, as all the requests are independent,
3930          * they may have other fences that delay individual request
3931          * submission to HW. Ergo, we do not guarantee that all requests are
3932          * immediately submitted to HW at the same time, just that if the
3933          * rules are abided by, they are ready at the same time as the
3934          * first is submitted. Userspace can embed semaphores in its batch
3935          * to ensure parallel execution of its phases as it requires.
3936          * Though naturally it gets requested that perhaps the scheduler should
3937          * take care of parallel execution, even across preemption events on
3938          * different HW. (The proper answer is of course "lalalala".)
3939          *
3940          * With the submit-fence, we have identified three possible phases
3941          * of synchronisation depending on the master fence: queued (not
3942          * ready), executing, and signaled. The first two are quite simple
3943          * and checked below. However, the signaled master fence handling is
3944          * contentious. Currently we do not distinguish between a signaled
3945          * fence and an expired fence, as once signaled it does not convey
3946          * any information about the previous execution. It may even be freed
3947          * and hence checking later it may not exist at all. Ergo we currently
3948          * do not apply the bonding constraint for an already signaled fence,
3949          * as our expectation is that it should not constrain the secondaries
3950          * and is outside of the scope of the bonded request API (i.e. all
3951          * userspace requests are meant to be running in parallel). As
3952          * it imposes no constraint, and is effectively a no-op, we do not
3953          * check below as normal execution flows are checked extensively above.
3954          *
3955          * XXX Is the degenerate handling of signaled submit fences the
3956          * expected behaviour for userpace?
3957          */
3958
3959         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3960
3961         if (igt_spinner_init(&spin, gt))
3962                 return -ENOMEM;
3963
3964         err = 0;
3965         rq[0] = ERR_PTR(-ENOMEM);
3966         for_each_engine(master, gt, id) {
3967                 struct i915_sw_fence fence = {};
3968                 struct intel_context *ce;
3969
3970                 if (master->class == class)
3971                         continue;
3972
3973                 ce = intel_context_create(master);
3974                 if (IS_ERR(ce)) {
3975                         err = PTR_ERR(ce);
3976                         goto out;
3977                 }
3978
3979                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3980
3981                 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3982                 intel_context_put(ce);
3983                 if (IS_ERR(rq[0])) {
3984                         err = PTR_ERR(rq[0]);
3985                         goto out;
3986                 }
3987                 i915_request_get(rq[0]);
3988
3989                 if (flags & BOND_SCHEDULE) {
3990                         onstack_fence_init(&fence);
3991                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3992                                                                &fence,
3993                                                                GFP_KERNEL);
3994                 }
3995
3996                 i915_request_add(rq[0]);
3997                 if (err < 0)
3998                         goto out;
3999
4000                 if (!(flags & BOND_SCHEDULE) &&
4001                     !igt_wait_for_spinner(&spin, rq[0])) {
4002                         err = -EIO;
4003                         goto out;
4004                 }
4005
4006                 for (n = 0; n < nsibling; n++) {
4007                         struct intel_context *ve;
4008
4009                         ve = intel_execlists_create_virtual(siblings, nsibling);
4010                         if (IS_ERR(ve)) {
4011                                 err = PTR_ERR(ve);
4012                                 onstack_fence_fini(&fence);
4013                                 goto out;
4014                         }
4015
4016                         err = intel_virtual_engine_attach_bond(ve->engine,
4017                                                                master,
4018                                                                siblings[n]);
4019                         if (err) {
4020                                 intel_context_put(ve);
4021                                 onstack_fence_fini(&fence);
4022                                 goto out;
4023                         }
4024
4025                         err = intel_context_pin(ve);
4026                         intel_context_put(ve);
4027                         if (err) {
4028                                 onstack_fence_fini(&fence);
4029                                 goto out;
4030                         }
4031
4032                         rq[n + 1] = i915_request_create(ve);
4033                         intel_context_unpin(ve);
4034                         if (IS_ERR(rq[n + 1])) {
4035                                 err = PTR_ERR(rq[n + 1]);
4036                                 onstack_fence_fini(&fence);
4037                                 goto out;
4038                         }
4039                         i915_request_get(rq[n + 1]);
4040
4041                         err = i915_request_await_execution(rq[n + 1],
4042                                                            &rq[0]->fence,
4043                                                            ve->engine->bond_execute);
4044                         i915_request_add(rq[n + 1]);
4045                         if (err < 0) {
4046                                 onstack_fence_fini(&fence);
4047                                 goto out;
4048                         }
4049                 }
4050                 onstack_fence_fini(&fence);
4051                 intel_engine_flush_submission(master);
4052                 igt_spinner_end(&spin);
4053
4054                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4055                         pr_err("Master request did not execute (on %s)!\n",
4056                                rq[0]->engine->name);
4057                         err = -EIO;
4058                         goto out;
4059                 }
4060
4061                 for (n = 0; n < nsibling; n++) {
4062                         if (i915_request_wait(rq[n + 1], 0,
4063                                               MAX_SCHEDULE_TIMEOUT) < 0) {
4064                                 err = -EIO;
4065                                 goto out;
4066                         }
4067
4068                         if (rq[n + 1]->engine != siblings[n]) {
4069                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4070                                        siblings[n]->name,
4071                                        rq[n + 1]->engine->name,
4072                                        rq[0]->engine->name);
4073                                 err = -EINVAL;
4074                                 goto out;
4075                         }
4076                 }
4077
4078                 for (n = 0; !IS_ERR(rq[n]); n++)
4079                         i915_request_put(rq[n]);
4080                 rq[0] = ERR_PTR(-ENOMEM);
4081         }
4082
4083 out:
4084         for (n = 0; !IS_ERR(rq[n]); n++)
4085                 i915_request_put(rq[n]);
4086         if (igt_flush_test(gt->i915))
4087                 err = -EIO;
4088
4089         igt_spinner_fini(&spin);
4090         return err;
4091 }
4092
4093 static int live_virtual_bond(void *arg)
4094 {
4095         static const struct phase {
4096                 const char *name;
4097                 unsigned int flags;
4098         } phases[] = {
4099                 { "", 0 },
4100                 { "schedule", BOND_SCHEDULE },
4101                 { },
4102         };
4103         struct intel_gt *gt = arg;
4104         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4105         unsigned int class, inst;
4106         int err;
4107
4108         if (intel_uc_uses_guc_submission(&gt->uc))
4109                 return 0;
4110
4111         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4112                 const struct phase *p;
4113                 int nsibling;
4114
4115                 nsibling = 0;
4116                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4117                         if (!gt->engine_class[class][inst])
4118                                 break;
4119
4120                         GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
4121                         siblings[nsibling++] = gt->engine_class[class][inst];
4122                 }
4123                 if (nsibling < 2)
4124                         continue;
4125
4126                 for (p = phases; p->name; p++) {
4127                         err = bond_virtual_engine(gt,
4128                                                   class, siblings, nsibling,
4129                                                   p->flags);
4130                         if (err) {
4131                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4132                                        __func__, p->name, class, nsibling, err);
4133                                 return err;
4134                         }
4135                 }
4136         }
4137
4138         return 0;
4139 }
4140
4141 static int reset_virtual_engine(struct intel_gt *gt,
4142                                 struct intel_engine_cs **siblings,
4143                                 unsigned int nsibling)
4144 {
4145         struct intel_engine_cs *engine;
4146         struct intel_context *ve;
4147         struct igt_spinner spin;
4148         struct i915_request *rq;
4149         unsigned int n;
4150         int err = 0;
4151
4152         /*
4153          * In order to support offline error capture for fast preempt reset,
4154          * we need to decouple the guilty request and ensure that it and its
4155          * descendents are not executed while the capture is in progress.
4156          */
4157
4158         if (igt_spinner_init(&spin, gt))
4159                 return -ENOMEM;
4160
4161         ve = intel_execlists_create_virtual(siblings, nsibling);
4162         if (IS_ERR(ve)) {
4163                 err = PTR_ERR(ve);
4164                 goto out_spin;
4165         }
4166
4167         for (n = 0; n < nsibling; n++)
4168                 engine_heartbeat_disable(siblings[n]);
4169
4170         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4171         if (IS_ERR(rq)) {
4172                 err = PTR_ERR(rq);
4173                 goto out_heartbeat;
4174         }
4175         i915_request_add(rq);
4176
4177         if (!igt_wait_for_spinner(&spin, rq)) {
4178                 intel_gt_set_wedged(gt);
4179                 err = -ETIME;
4180                 goto out_heartbeat;
4181         }
4182
4183         engine = rq->engine;
4184         GEM_BUG_ON(engine == ve->engine);
4185
4186         /* Take ownership of the reset and tasklet */
4187         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4188                              &gt->reset.flags)) {
4189                 intel_gt_set_wedged(gt);
4190                 err = -EBUSY;
4191                 goto out_heartbeat;
4192         }
4193         tasklet_disable(&engine->execlists.tasklet);
4194
4195         engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4196         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4197
4198         /* Fake a preemption event; failed of course */
4199         spin_lock_irq(&engine->active.lock);
4200         __unwind_incomplete_requests(engine);
4201         spin_unlock_irq(&engine->active.lock);
4202         GEM_BUG_ON(rq->engine != ve->engine);
4203
4204         /* Reset the engine while keeping our active request on hold */
4205         execlists_hold(engine, rq);
4206         GEM_BUG_ON(!i915_request_on_hold(rq));
4207
4208         intel_engine_reset(engine, NULL);
4209         GEM_BUG_ON(rq->fence.error != -EIO);
4210
4211         /* Release our grasp on the engine, letting CS flow again */
4212         tasklet_enable(&engine->execlists.tasklet);
4213         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4214
4215         /* Check that we do not resubmit the held request */
4216         i915_request_get(rq);
4217         if (!i915_request_wait(rq, 0, HZ / 5)) {
4218                 pr_err("%s: on hold request completed!\n",
4219                        engine->name);
4220                 intel_gt_set_wedged(gt);
4221                 err = -EIO;
4222                 goto out_rq;
4223         }
4224         GEM_BUG_ON(!i915_request_on_hold(rq));
4225
4226         /* But is resubmitted on release */
4227         execlists_unhold(engine, rq);
4228         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4229                 pr_err("%s: held request did not complete!\n",
4230                        engine->name);
4231                 intel_gt_set_wedged(gt);
4232                 err = -ETIME;
4233         }
4234
4235 out_rq:
4236         i915_request_put(rq);
4237 out_heartbeat:
4238         for (n = 0; n < nsibling; n++)
4239                 engine_heartbeat_enable(siblings[n]);
4240
4241         intel_context_put(ve);
4242 out_spin:
4243         igt_spinner_fini(&spin);
4244         return err;
4245 }
4246
4247 static int live_virtual_reset(void *arg)
4248 {
4249         struct intel_gt *gt = arg;
4250         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4251         unsigned int class, inst;
4252
4253         /*
4254          * Check that we handle a reset event within a virtual engine.
4255          * Only the physical engine is reset, but we have to check the flow
4256          * of the virtual requests around the reset, and make sure it is not
4257          * forgotten.
4258          */
4259
4260         if (intel_uc_uses_guc_submission(&gt->uc))
4261                 return 0;
4262
4263         if (!intel_has_reset_engine(gt))
4264                 return 0;
4265
4266         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4267                 int nsibling, err;
4268
4269                 nsibling = 0;
4270                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4271                         if (!gt->engine_class[class][inst])
4272                                 continue;
4273
4274                         siblings[nsibling++] = gt->engine_class[class][inst];
4275                 }
4276                 if (nsibling < 2)
4277                         continue;
4278
4279                 err = reset_virtual_engine(gt, siblings, nsibling);
4280                 if (err)
4281                         return err;
4282         }
4283
4284         return 0;
4285 }
4286
4287 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4288 {
4289         static const struct i915_subtest tests[] = {
4290                 SUBTEST(live_sanitycheck),
4291                 SUBTEST(live_unlite_switch),
4292                 SUBTEST(live_unlite_preempt),
4293                 SUBTEST(live_pin_rewind),
4294                 SUBTEST(live_hold_reset),
4295                 SUBTEST(live_error_interrupt),
4296                 SUBTEST(live_timeslice_preempt),
4297                 SUBTEST(live_timeslice_rewind),
4298                 SUBTEST(live_timeslice_queue),
4299                 SUBTEST(live_busywait_preempt),
4300                 SUBTEST(live_preempt),
4301                 SUBTEST(live_late_preempt),
4302                 SUBTEST(live_nopreempt),
4303                 SUBTEST(live_preempt_cancel),
4304                 SUBTEST(live_suppress_self_preempt),
4305                 SUBTEST(live_suppress_wait_preempt),
4306                 SUBTEST(live_chain_preempt),
4307                 SUBTEST(live_preempt_gang),
4308                 SUBTEST(live_preempt_timeout),
4309                 SUBTEST(live_preempt_user),
4310                 SUBTEST(live_preempt_smoke),
4311                 SUBTEST(live_virtual_engine),
4312                 SUBTEST(live_virtual_mask),
4313                 SUBTEST(live_virtual_preserved),
4314                 SUBTEST(live_virtual_bond),
4315                 SUBTEST(live_virtual_reset),
4316         };
4317
4318         if (!HAS_EXECLISTS(i915))
4319                 return 0;
4320
4321         if (intel_gt_is_wedged(&i915->gt))
4322                 return 0;
4323
4324         return intel_gt_live_subtests(tests, &i915->gt);
4325 }
4326
4327 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4328 {
4329         const u32 offset =
4330                 i915_ggtt_offset(ce->engine->status_page.vma) +
4331                 offset_in_page(slot);
4332         struct i915_request *rq;
4333         u32 *cs;
4334
4335         rq = intel_context_create_request(ce);
4336         if (IS_ERR(rq))
4337                 return PTR_ERR(rq);
4338
4339         cs = intel_ring_begin(rq, 4);
4340         if (IS_ERR(cs)) {
4341                 i915_request_add(rq);
4342                 return PTR_ERR(cs);
4343         }
4344
4345         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4346         *cs++ = offset;
4347         *cs++ = 0;
4348         *cs++ = 1;
4349
4350         intel_ring_advance(rq, cs);
4351
4352         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4353         i915_request_add(rq);
4354         return 0;
4355 }
4356
4357 static int context_flush(struct intel_context *ce, long timeout)
4358 {
4359         struct i915_request *rq;
4360         struct dma_fence *fence;
4361         int err = 0;
4362
4363         rq = intel_engine_create_kernel_request(ce->engine);
4364         if (IS_ERR(rq))
4365                 return PTR_ERR(rq);
4366
4367         fence = i915_active_fence_get(&ce->timeline->last_request);
4368         if (fence) {
4369                 i915_request_await_dma_fence(rq, fence);
4370                 dma_fence_put(fence);
4371         }
4372
4373         rq = i915_request_get(rq);
4374         i915_request_add(rq);
4375         if (i915_request_wait(rq, 0, timeout) < 0)
4376                 err = -ETIME;
4377         i915_request_put(rq);
4378
4379         rmb(); /* We know the request is written, make sure all state is too! */
4380         return err;
4381 }
4382
4383 static int live_lrc_layout(void *arg)
4384 {
4385         struct intel_gt *gt = arg;
4386         struct intel_engine_cs *engine;
4387         enum intel_engine_id id;
4388         u32 *lrc;
4389         int err;
4390
4391         /*
4392          * Check the registers offsets we use to create the initial reg state
4393          * match the layout saved by HW.
4394          */
4395
4396         lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4397         if (!lrc)
4398                 return -ENOMEM;
4399
4400         err = 0;
4401         for_each_engine(engine, gt, id) {
4402                 u32 *hw;
4403                 int dw;
4404
4405                 if (!engine->default_state)
4406                         continue;
4407
4408                 hw = shmem_pin_map(engine->default_state);
4409                 if (IS_ERR(hw)) {
4410                         err = PTR_ERR(hw);
4411                         break;
4412                 }
4413                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4414
4415                 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4416                                          engine->kernel_context,
4417                                          engine,
4418                                          engine->kernel_context->ring,
4419                                          true);
4420
4421                 dw = 0;
4422                 do {
4423                         u32 lri = hw[dw];
4424
4425                         if (lri == 0) {
4426                                 dw++;
4427                                 continue;
4428                         }
4429
4430                         if (lrc[dw] == 0) {
4431                                 pr_debug("%s: skipped instruction %x at dword %d\n",
4432                                          engine->name, lri, dw);
4433                                 dw++;
4434                                 continue;
4435                         }
4436
4437                         if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4438                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4439                                        engine->name, dw, lri);
4440                                 err = -EINVAL;
4441                                 break;
4442                         }
4443
4444                         if (lrc[dw] != lri) {
4445                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4446                                        engine->name, dw, lri, lrc[dw]);
4447                                 err = -EINVAL;
4448                                 break;
4449                         }
4450
4451                         lri &= 0x7f;
4452                         lri++;
4453                         dw++;
4454
4455                         while (lri) {
4456                                 if (hw[dw] != lrc[dw]) {
4457                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4458                                                engine->name, dw, hw[dw], lrc[dw]);
4459                                         err = -EINVAL;
4460                                         break;
4461                                 }
4462
4463                                 /*
4464                                  * Skip over the actual register value as we
4465                                  * expect that to differ.
4466                                  */
4467                                 dw += 2;
4468                                 lri -= 2;
4469                         }
4470                 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4471
4472                 if (err) {
4473                         pr_info("%s: HW register image:\n", engine->name);
4474                         igt_hexdump(hw, PAGE_SIZE);
4475
4476                         pr_info("%s: SW register image:\n", engine->name);
4477                         igt_hexdump(lrc, PAGE_SIZE);
4478                 }
4479
4480                 shmem_unpin_map(engine->default_state, hw);
4481                 if (err)
4482                         break;
4483         }
4484
4485         kfree(lrc);
4486         return err;
4487 }
4488
4489 static int find_offset(const u32 *lri, u32 offset)
4490 {
4491         int i;
4492
4493         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4494                 if (lri[i] == offset)
4495                         return i;
4496
4497         return -1;
4498 }
4499
4500 static int live_lrc_fixed(void *arg)
4501 {
4502         struct intel_gt *gt = arg;
4503         struct intel_engine_cs *engine;
4504         enum intel_engine_id id;
4505         int err = 0;
4506
4507         /*
4508          * Check the assumed register offsets match the actual locations in
4509          * the context image.
4510          */
4511
4512         for_each_engine(engine, gt, id) {
4513                 const struct {
4514                         u32 reg;
4515                         u32 offset;
4516                         const char *name;
4517                 } tbl[] = {
4518                         {
4519                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4520                                 CTX_RING_START - 1,
4521                                 "RING_START"
4522                         },
4523                         {
4524                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4525                                 CTX_RING_CTL - 1,
4526                                 "RING_CTL"
4527                         },
4528                         {
4529                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4530                                 CTX_RING_HEAD - 1,
4531                                 "RING_HEAD"
4532                         },
4533                         {
4534                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4535                                 CTX_RING_TAIL - 1,
4536                                 "RING_TAIL"
4537                         },
4538                         {
4539                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4540                                 lrc_ring_mi_mode(engine),
4541                                 "RING_MI_MODE"
4542                         },
4543                         {
4544                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4545                                 CTX_BB_STATE - 1,
4546                                 "BB_STATE"
4547                         },
4548                         {
4549                                 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4550                                 lrc_ring_wa_bb_per_ctx(engine),
4551                                 "RING_BB_PER_CTX_PTR"
4552                         },
4553                         {
4554                                 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4555                                 lrc_ring_indirect_ptr(engine),
4556                                 "RING_INDIRECT_CTX_PTR"
4557                         },
4558                         {
4559                                 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4560                                 lrc_ring_indirect_offset(engine),
4561                                 "RING_INDIRECT_CTX_OFFSET"
4562                         },
4563                         {
4564                                 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4565                                 CTX_TIMESTAMP - 1,
4566                                 "RING_CTX_TIMESTAMP"
4567                         },
4568                         {
4569                                 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4570                                 lrc_ring_gpr0(engine),
4571                                 "RING_CS_GPR0"
4572                         },
4573                         {
4574                                 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4575                                 lrc_ring_cmd_buf_cctl(engine),
4576                                 "RING_CMD_BUF_CCTL"
4577                         },
4578                         { },
4579                 }, *t;
4580                 u32 *hw;
4581
4582                 if (!engine->default_state)
4583                         continue;
4584
4585                 hw = shmem_pin_map(engine->default_state);
4586                 if (IS_ERR(hw)) {
4587                         err = PTR_ERR(hw);
4588                         break;
4589                 }
4590                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4591
4592                 for (t = tbl; t->name; t++) {
4593                         int dw = find_offset(hw, t->reg);
4594
4595                         if (dw != t->offset) {
4596                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4597                                        engine->name,
4598                                        t->name,
4599                                        t->reg,
4600                                        dw,
4601                                        t->offset);
4602                                 err = -EINVAL;
4603                         }
4604                 }
4605
4606                 shmem_unpin_map(engine->default_state, hw);
4607         }
4608
4609         return err;
4610 }
4611
4612 static int __live_lrc_state(struct intel_engine_cs *engine,
4613                             struct i915_vma *scratch)
4614 {
4615         struct intel_context *ce;
4616         struct i915_request *rq;
4617         enum {
4618                 RING_START_IDX = 0,
4619                 RING_TAIL_IDX,
4620                 MAX_IDX
4621         };
4622         u32 expected[MAX_IDX];
4623         u32 *cs;
4624         int err;
4625         int n;
4626
4627         ce = intel_context_create(engine);
4628         if (IS_ERR(ce))
4629                 return PTR_ERR(ce);
4630
4631         err = intel_context_pin(ce);
4632         if (err)
4633                 goto err_put;
4634
4635         rq = i915_request_create(ce);
4636         if (IS_ERR(rq)) {
4637                 err = PTR_ERR(rq);
4638                 goto err_unpin;
4639         }
4640
4641         cs = intel_ring_begin(rq, 4 * MAX_IDX);
4642         if (IS_ERR(cs)) {
4643                 err = PTR_ERR(cs);
4644                 i915_request_add(rq);
4645                 goto err_unpin;
4646         }
4647
4648         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4649         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4650         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4651         *cs++ = 0;
4652
4653         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4654
4655         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4656         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4657         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4658         *cs++ = 0;
4659
4660         i915_vma_lock(scratch);
4661         err = i915_request_await_object(rq, scratch->obj, true);
4662         if (!err)
4663                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4664         i915_vma_unlock(scratch);
4665
4666         i915_request_get(rq);
4667         i915_request_add(rq);
4668         if (err)
4669                 goto err_rq;
4670
4671         intel_engine_flush_submission(engine);
4672         expected[RING_TAIL_IDX] = ce->ring->tail;
4673
4674         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4675                 err = -ETIME;
4676                 goto err_rq;
4677         }
4678
4679         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4680         if (IS_ERR(cs)) {
4681                 err = PTR_ERR(cs);
4682                 goto err_rq;
4683         }
4684
4685         for (n = 0; n < MAX_IDX; n++) {
4686                 if (cs[n] != expected[n]) {
4687                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4688                                engine->name, n, cs[n], expected[n]);
4689                         err = -EINVAL;
4690                         break;
4691                 }
4692         }
4693
4694         i915_gem_object_unpin_map(scratch->obj);
4695
4696 err_rq:
4697         i915_request_put(rq);
4698 err_unpin:
4699         intel_context_unpin(ce);
4700 err_put:
4701         intel_context_put(ce);
4702         return err;
4703 }
4704
4705 static int live_lrc_state(void *arg)
4706 {
4707         struct intel_gt *gt = arg;
4708         struct intel_engine_cs *engine;
4709         struct i915_vma *scratch;
4710         enum intel_engine_id id;
4711         int err = 0;
4712
4713         /*
4714          * Check the live register state matches what we expect for this
4715          * intel_context.
4716          */
4717
4718         scratch = create_scratch(gt);
4719         if (IS_ERR(scratch))
4720                 return PTR_ERR(scratch);
4721
4722         for_each_engine(engine, gt, id) {
4723                 err = __live_lrc_state(engine, scratch);
4724                 if (err)
4725                         break;
4726         }
4727
4728         if (igt_flush_test(gt->i915))
4729                 err = -EIO;
4730
4731         i915_vma_unpin_and_release(&scratch, 0);
4732         return err;
4733 }
4734
4735 static int gpr_make_dirty(struct intel_context *ce)
4736 {
4737         struct i915_request *rq;
4738         u32 *cs;
4739         int n;
4740
4741         rq = intel_context_create_request(ce);
4742         if (IS_ERR(rq))
4743                 return PTR_ERR(rq);
4744
4745         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4746         if (IS_ERR(cs)) {
4747                 i915_request_add(rq);
4748                 return PTR_ERR(cs);
4749         }
4750
4751         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4752         for (n = 0; n < NUM_GPR_DW; n++) {
4753                 *cs++ = CS_GPR(ce->engine, n);
4754                 *cs++ = STACK_MAGIC;
4755         }
4756         *cs++ = MI_NOOP;
4757
4758         intel_ring_advance(rq, cs);
4759
4760         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4761         i915_request_add(rq);
4762
4763         return 0;
4764 }
4765
4766 static struct i915_request *
4767 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4768 {
4769         const u32 offset =
4770                 i915_ggtt_offset(ce->engine->status_page.vma) +
4771                 offset_in_page(slot);
4772         struct i915_request *rq;
4773         u32 *cs;
4774         int err;
4775         int n;
4776
4777         rq = intel_context_create_request(ce);
4778         if (IS_ERR(rq))
4779                 return rq;
4780
4781         cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4782         if (IS_ERR(cs)) {
4783                 i915_request_add(rq);
4784                 return ERR_CAST(cs);
4785         }
4786
4787         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4788         *cs++ = MI_NOOP;
4789
4790         *cs++ = MI_SEMAPHORE_WAIT |
4791                 MI_SEMAPHORE_GLOBAL_GTT |
4792                 MI_SEMAPHORE_POLL |
4793                 MI_SEMAPHORE_SAD_NEQ_SDD;
4794         *cs++ = 0;
4795         *cs++ = offset;
4796         *cs++ = 0;
4797
4798         for (n = 0; n < NUM_GPR_DW; n++) {
4799                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4800                 *cs++ = CS_GPR(ce->engine, n);
4801                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4802                 *cs++ = 0;
4803         }
4804
4805         i915_vma_lock(scratch);
4806         err = i915_request_await_object(rq, scratch->obj, true);
4807         if (!err)
4808                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4809         i915_vma_unlock(scratch);
4810
4811         i915_request_get(rq);
4812         i915_request_add(rq);
4813         if (err) {
4814                 i915_request_put(rq);
4815                 rq = ERR_PTR(err);
4816         }
4817
4818         return rq;
4819 }
4820
4821 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4822                           struct i915_vma *scratch,
4823                           bool preempt)
4824 {
4825         u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4826         struct intel_context *ce;
4827         struct i915_request *rq;
4828         u32 *cs;
4829         int err;
4830         int n;
4831
4832         if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4833                 return 0; /* GPR only on rcs0 for gen8 */
4834
4835         err = gpr_make_dirty(engine->kernel_context);
4836         if (err)
4837                 return err;
4838
4839         ce = intel_context_create(engine);
4840         if (IS_ERR(ce))
4841                 return PTR_ERR(ce);
4842
4843         rq = __gpr_read(ce, scratch, slot);
4844         if (IS_ERR(rq)) {
4845                 err = PTR_ERR(rq);
4846                 goto err_put;
4847         }
4848
4849         err = wait_for_submit(engine, rq, HZ / 2);
4850         if (err)
4851                 goto err_rq;
4852
4853         if (preempt) {
4854                 err = gpr_make_dirty(engine->kernel_context);
4855                 if (err)
4856                         goto err_rq;
4857
4858                 err = emit_semaphore_signal(engine->kernel_context, slot);
4859                 if (err)
4860                         goto err_rq;
4861         } else {
4862                 slot[0] = 1;
4863                 wmb();
4864         }
4865
4866         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4867                 err = -ETIME;
4868                 goto err_rq;
4869         }
4870
4871         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4872         if (IS_ERR(cs)) {
4873                 err = PTR_ERR(cs);
4874                 goto err_rq;
4875         }
4876
4877         for (n = 0; n < NUM_GPR_DW; n++) {
4878                 if (cs[n]) {
4879                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4880                                engine->name,
4881                                n / 2, n & 1 ? "udw" : "ldw",
4882                                cs[n]);
4883                         err = -EINVAL;
4884                         break;
4885                 }
4886         }
4887
4888         i915_gem_object_unpin_map(scratch->obj);
4889
4890 err_rq:
4891         memset32(&slot[0], -1, 4);
4892         wmb();
4893         i915_request_put(rq);
4894 err_put:
4895         intel_context_put(ce);
4896         return err;
4897 }
4898
4899 static int live_lrc_gpr(void *arg)
4900 {
4901         struct intel_gt *gt = arg;
4902         struct intel_engine_cs *engine;
4903         struct i915_vma *scratch;
4904         enum intel_engine_id id;
4905         int err = 0;
4906
4907         /*
4908          * Check that GPR registers are cleared in new contexts as we need
4909          * to avoid leaking any information from previous contexts.
4910          */
4911
4912         scratch = create_scratch(gt);
4913         if (IS_ERR(scratch))
4914                 return PTR_ERR(scratch);
4915
4916         for_each_engine(engine, gt, id) {
4917                 engine_heartbeat_disable(engine);
4918
4919                 err = __live_lrc_gpr(engine, scratch, false);
4920                 if (err)
4921                         goto err;
4922
4923                 err = __live_lrc_gpr(engine, scratch, true);
4924                 if (err)
4925                         goto err;
4926
4927 err:
4928                 engine_heartbeat_enable(engine);
4929                 if (igt_flush_test(gt->i915))
4930                         err = -EIO;
4931                 if (err)
4932                         break;
4933         }
4934
4935         i915_vma_unpin_and_release(&scratch, 0);
4936         return err;
4937 }
4938
4939 static struct i915_request *
4940 create_timestamp(struct intel_context *ce, void *slot, int idx)
4941 {
4942         const u32 offset =
4943                 i915_ggtt_offset(ce->engine->status_page.vma) +
4944                 offset_in_page(slot);
4945         struct i915_request *rq;
4946         u32 *cs;
4947         int err;
4948
4949         rq = intel_context_create_request(ce);
4950         if (IS_ERR(rq))
4951                 return rq;
4952
4953         cs = intel_ring_begin(rq, 10);
4954         if (IS_ERR(cs)) {
4955                 err = PTR_ERR(cs);
4956                 goto err;
4957         }
4958
4959         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4960         *cs++ = MI_NOOP;
4961
4962         *cs++ = MI_SEMAPHORE_WAIT |
4963                 MI_SEMAPHORE_GLOBAL_GTT |
4964                 MI_SEMAPHORE_POLL |
4965                 MI_SEMAPHORE_SAD_NEQ_SDD;
4966         *cs++ = 0;
4967         *cs++ = offset;
4968         *cs++ = 0;
4969
4970         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4971         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4972         *cs++ = offset + idx * sizeof(u32);
4973         *cs++ = 0;
4974
4975         intel_ring_advance(rq, cs);
4976
4977         rq->sched.attr.priority = I915_PRIORITY_MASK;
4978         err = 0;
4979 err:
4980         i915_request_get(rq);
4981         i915_request_add(rq);
4982         if (err) {
4983                 i915_request_put(rq);
4984                 return ERR_PTR(err);
4985         }
4986
4987         return rq;
4988 }
4989
4990 struct lrc_timestamp {
4991         struct intel_engine_cs *engine;
4992         struct intel_context *ce[2];
4993         u32 poison;
4994 };
4995
4996 static bool timestamp_advanced(u32 start, u32 end)
4997 {
4998         return (s32)(end - start) > 0;
4999 }
5000
5001 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5002 {
5003         u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5004         struct i915_request *rq;
5005         u32 timestamp;
5006         int err = 0;
5007
5008         arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5009         rq = create_timestamp(arg->ce[0], slot, 1);
5010         if (IS_ERR(rq))
5011                 return PTR_ERR(rq);
5012
5013         err = wait_for_submit(rq->engine, rq, HZ / 2);
5014         if (err)
5015                 goto err;
5016
5017         if (preempt) {
5018                 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5019                 err = emit_semaphore_signal(arg->ce[1], slot);
5020                 if (err)
5021                         goto err;
5022         } else {
5023                 slot[0] = 1;
5024                 wmb();
5025         }
5026
5027         /* And wait for switch to kernel (to save our context to memory) */
5028         err = context_flush(arg->ce[0], HZ / 2);
5029         if (err)
5030                 goto err;
5031
5032         if (!timestamp_advanced(arg->poison, slot[1])) {
5033                 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5034                        arg->engine->name, preempt ? "preempt" : "simple",
5035                        arg->poison, slot[1]);
5036                 err = -EINVAL;
5037         }
5038
5039         timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5040         if (!timestamp_advanced(slot[1], timestamp)) {
5041                 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5042                        arg->engine->name, preempt ? "preempt" : "simple",
5043                        slot[1], timestamp);
5044                 err = -EINVAL;
5045         }
5046
5047 err:
5048         memset32(slot, -1, 4);
5049         i915_request_put(rq);
5050         return err;
5051 }
5052
5053 static int live_lrc_timestamp(void *arg)
5054 {
5055         struct lrc_timestamp data = {};
5056         struct intel_gt *gt = arg;
5057         enum intel_engine_id id;
5058         const u32 poison[] = {
5059                 0,
5060                 S32_MAX,
5061                 (u32)S32_MAX + 1,
5062                 U32_MAX,
5063         };
5064
5065         /*
5066          * We want to verify that the timestamp is saved and restore across
5067          * context switches and is monotonic.
5068          *
5069          * So we do this with a little bit of LRC poisoning to check various
5070          * boundary conditions, and see what happens if we preempt the context
5071          * with a second request (carrying more poison into the timestamp).
5072          */
5073
5074         for_each_engine(data.engine, gt, id) {
5075                 int i, err = 0;
5076
5077                 engine_heartbeat_disable(data.engine);
5078
5079                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5080                         struct intel_context *tmp;
5081
5082                         tmp = intel_context_create(data.engine);
5083                         if (IS_ERR(tmp)) {
5084                                 err = PTR_ERR(tmp);
5085                                 goto err;
5086                         }
5087
5088                         err = intel_context_pin(tmp);
5089                         if (err) {
5090                                 intel_context_put(tmp);
5091                                 goto err;
5092                         }
5093
5094                         data.ce[i] = tmp;
5095                 }
5096
5097                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5098                         data.poison = poison[i];
5099
5100                         err = __lrc_timestamp(&data, false);
5101                         if (err)
5102                                 break;
5103
5104                         err = __lrc_timestamp(&data, true);
5105                         if (err)
5106                                 break;
5107                 }
5108
5109 err:
5110                 engine_heartbeat_enable(data.engine);
5111                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5112                         if (!data.ce[i])
5113                                 break;
5114
5115                         intel_context_unpin(data.ce[i]);
5116                         intel_context_put(data.ce[i]);
5117                 }
5118
5119                 if (igt_flush_test(gt->i915))
5120                         err = -EIO;
5121                 if (err)
5122                         return err;
5123         }
5124
5125         return 0;
5126 }
5127
5128 static struct i915_vma *
5129 create_user_vma(struct i915_address_space *vm, unsigned long size)
5130 {
5131         struct drm_i915_gem_object *obj;
5132         struct i915_vma *vma;
5133         int err;
5134
5135         obj = i915_gem_object_create_internal(vm->i915, size);
5136         if (IS_ERR(obj))
5137                 return ERR_CAST(obj);
5138
5139         vma = i915_vma_instance(obj, vm, NULL);
5140         if (IS_ERR(vma)) {
5141                 i915_gem_object_put(obj);
5142                 return vma;
5143         }
5144
5145         err = i915_vma_pin(vma, 0, 0, PIN_USER);
5146         if (err) {
5147                 i915_gem_object_put(obj);
5148                 return ERR_PTR(err);
5149         }
5150
5151         return vma;
5152 }
5153
5154 static struct i915_vma *
5155 store_context(struct intel_context *ce, struct i915_vma *scratch)
5156 {
5157         struct i915_vma *batch;
5158         u32 dw, x, *cs, *hw;
5159         u32 *defaults;
5160
5161         batch = create_user_vma(ce->vm, SZ_64K);
5162         if (IS_ERR(batch))
5163                 return batch;
5164
5165         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5166         if (IS_ERR(cs)) {
5167                 i915_vma_put(batch);
5168                 return ERR_CAST(cs);
5169         }
5170
5171         defaults = shmem_pin_map(ce->engine->default_state);
5172         if (!defaults) {
5173                 i915_gem_object_unpin_map(batch->obj);
5174                 i915_vma_put(batch);
5175                 return ERR_PTR(-ENOMEM);
5176         }
5177
5178         x = 0;
5179         dw = 0;
5180         hw = defaults;
5181         hw += LRC_STATE_OFFSET / sizeof(*hw);
5182         do {
5183                 u32 len = hw[dw] & 0x7f;
5184
5185                 if (hw[dw] == 0) {
5186                         dw++;
5187                         continue;
5188                 }
5189
5190                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5191                         dw += len + 2;
5192                         continue;
5193                 }
5194
5195                 dw++;
5196                 len = (len + 1) / 2;
5197                 while (len--) {
5198                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5199                         *cs++ = hw[dw];
5200                         *cs++ = lower_32_bits(scratch->node.start + x);
5201                         *cs++ = upper_32_bits(scratch->node.start + x);
5202
5203                         dw += 2;
5204                         x += 4;
5205                 }
5206         } while (dw < PAGE_SIZE / sizeof(u32) &&
5207                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5208
5209         *cs++ = MI_BATCH_BUFFER_END;
5210
5211         shmem_unpin_map(ce->engine->default_state, defaults);
5212
5213         i915_gem_object_flush_map(batch->obj);
5214         i915_gem_object_unpin_map(batch->obj);
5215
5216         return batch;
5217 }
5218
5219 static int move_to_active(struct i915_request *rq,
5220                           struct i915_vma *vma,
5221                           unsigned int flags)
5222 {
5223         int err;
5224
5225         i915_vma_lock(vma);
5226         err = i915_request_await_object(rq, vma->obj, flags);
5227         if (!err)
5228                 err = i915_vma_move_to_active(vma, rq, flags);
5229         i915_vma_unlock(vma);
5230
5231         return err;
5232 }
5233
5234 static struct i915_request *
5235 record_registers(struct intel_context *ce,
5236                  struct i915_vma *before,
5237                  struct i915_vma *after,
5238                  u32 *sema)
5239 {
5240         struct i915_vma *b_before, *b_after;
5241         struct i915_request *rq;
5242         u32 *cs;
5243         int err;
5244
5245         b_before = store_context(ce, before);
5246         if (IS_ERR(b_before))
5247                 return ERR_CAST(b_before);
5248
5249         b_after = store_context(ce, after);
5250         if (IS_ERR(b_after)) {
5251                 rq = ERR_CAST(b_after);
5252                 goto err_before;
5253         }
5254
5255         rq = intel_context_create_request(ce);
5256         if (IS_ERR(rq))
5257                 goto err_after;
5258
5259         err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5260         if (err)
5261                 goto err_rq;
5262
5263         err = move_to_active(rq, b_before, 0);
5264         if (err)
5265                 goto err_rq;
5266
5267         err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5268         if (err)
5269                 goto err_rq;
5270
5271         err = move_to_active(rq, b_after, 0);
5272         if (err)
5273                 goto err_rq;
5274
5275         cs = intel_ring_begin(rq, 14);
5276         if (IS_ERR(cs)) {
5277                 err = PTR_ERR(cs);
5278                 goto err_rq;
5279         }
5280
5281         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5282         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5283         *cs++ = lower_32_bits(b_before->node.start);
5284         *cs++ = upper_32_bits(b_before->node.start);
5285
5286         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5287         *cs++ = MI_SEMAPHORE_WAIT |
5288                 MI_SEMAPHORE_GLOBAL_GTT |
5289                 MI_SEMAPHORE_POLL |
5290                 MI_SEMAPHORE_SAD_NEQ_SDD;
5291         *cs++ = 0;
5292         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5293                 offset_in_page(sema);
5294         *cs++ = 0;
5295         *cs++ = MI_NOOP;
5296
5297         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5298         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5299         *cs++ = lower_32_bits(b_after->node.start);
5300         *cs++ = upper_32_bits(b_after->node.start);
5301
5302         intel_ring_advance(rq, cs);
5303
5304         WRITE_ONCE(*sema, 0);
5305         i915_request_get(rq);
5306         i915_request_add(rq);
5307 err_after:
5308         i915_vma_put(b_after);
5309 err_before:
5310         i915_vma_put(b_before);
5311         return rq;
5312
5313 err_rq:
5314         i915_request_add(rq);
5315         rq = ERR_PTR(err);
5316         goto err_after;
5317 }
5318
5319 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5320 {
5321         struct i915_vma *batch;
5322         u32 dw, *cs, *hw;
5323         u32 *defaults;
5324
5325         batch = create_user_vma(ce->vm, SZ_64K);
5326         if (IS_ERR(batch))
5327                 return batch;
5328
5329         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5330         if (IS_ERR(cs)) {
5331                 i915_vma_put(batch);
5332                 return ERR_CAST(cs);
5333         }
5334
5335         defaults = shmem_pin_map(ce->engine->default_state);
5336         if (!defaults) {
5337                 i915_gem_object_unpin_map(batch->obj);
5338                 i915_vma_put(batch);
5339                 return ERR_PTR(-ENOMEM);
5340         }
5341
5342         dw = 0;
5343         hw = defaults;
5344         hw += LRC_STATE_OFFSET / sizeof(*hw);
5345         do {
5346                 u32 len = hw[dw] & 0x7f;
5347
5348                 if (hw[dw] == 0) {
5349                         dw++;
5350                         continue;
5351                 }
5352
5353                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5354                         dw += len + 2;
5355                         continue;
5356                 }
5357
5358                 dw++;
5359                 len = (len + 1) / 2;
5360                 *cs++ = MI_LOAD_REGISTER_IMM(len);
5361                 while (len--) {
5362                         *cs++ = hw[dw];
5363                         *cs++ = poison;
5364                         dw += 2;
5365                 }
5366         } while (dw < PAGE_SIZE / sizeof(u32) &&
5367                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5368
5369         *cs++ = MI_BATCH_BUFFER_END;
5370
5371         shmem_unpin_map(ce->engine->default_state, defaults);
5372
5373         i915_gem_object_flush_map(batch->obj);
5374         i915_gem_object_unpin_map(batch->obj);
5375
5376         return batch;
5377 }
5378
5379 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5380 {
5381         struct i915_request *rq;
5382         struct i915_vma *batch;
5383         u32 *cs;
5384         int err;
5385
5386         batch = load_context(ce, poison);
5387         if (IS_ERR(batch))
5388                 return PTR_ERR(batch);
5389
5390         rq = intel_context_create_request(ce);
5391         if (IS_ERR(rq)) {
5392                 err = PTR_ERR(rq);
5393                 goto err_batch;
5394         }
5395
5396         err = move_to_active(rq, batch, 0);
5397         if (err)
5398                 goto err_rq;
5399
5400         cs = intel_ring_begin(rq, 8);
5401         if (IS_ERR(cs)) {
5402                 err = PTR_ERR(cs);
5403                 goto err_rq;
5404         }
5405
5406         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5407         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5408         *cs++ = lower_32_bits(batch->node.start);
5409         *cs++ = upper_32_bits(batch->node.start);
5410
5411         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5412         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5413                 offset_in_page(sema);
5414         *cs++ = 0;
5415         *cs++ = 1;
5416
5417         intel_ring_advance(rq, cs);
5418
5419         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5420 err_rq:
5421         i915_request_add(rq);
5422 err_batch:
5423         i915_vma_put(batch);
5424         return err;
5425 }
5426
5427 static bool is_moving(u32 a, u32 b)
5428 {
5429         return a != b;
5430 }
5431
5432 static int compare_isolation(struct intel_engine_cs *engine,
5433                              struct i915_vma *ref[2],
5434                              struct i915_vma *result[2],
5435                              struct intel_context *ce,
5436                              u32 poison)
5437 {
5438         u32 x, dw, *hw, *lrc;
5439         u32 *A[2], *B[2];
5440         u32 *defaults;
5441         int err = 0;
5442
5443         A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5444         if (IS_ERR(A[0]))
5445                 return PTR_ERR(A[0]);
5446
5447         A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5448         if (IS_ERR(A[1])) {
5449                 err = PTR_ERR(A[1]);
5450                 goto err_A0;
5451         }
5452
5453         B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5454         if (IS_ERR(B[0])) {
5455                 err = PTR_ERR(B[0]);
5456                 goto err_A1;
5457         }
5458
5459         B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5460         if (IS_ERR(B[1])) {
5461                 err = PTR_ERR(B[1]);
5462                 goto err_B0;
5463         }
5464
5465         lrc = i915_gem_object_pin_map(ce->state->obj,
5466                                       i915_coherent_map_type(engine->i915));
5467         if (IS_ERR(lrc)) {
5468                 err = PTR_ERR(lrc);
5469                 goto err_B1;
5470         }
5471         lrc += LRC_STATE_OFFSET / sizeof(*hw);
5472
5473         defaults = shmem_pin_map(ce->engine->default_state);
5474         if (!defaults) {
5475                 err = -ENOMEM;
5476                 goto err_lrc;
5477         }
5478
5479         x = 0;
5480         dw = 0;
5481         hw = defaults;
5482         hw += LRC_STATE_OFFSET / sizeof(*hw);
5483         do {
5484                 u32 len = hw[dw] & 0x7f;
5485
5486                 if (hw[dw] == 0) {
5487                         dw++;
5488                         continue;
5489                 }
5490
5491                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5492                         dw += len + 2;
5493                         continue;
5494                 }
5495
5496                 dw++;
5497                 len = (len + 1) / 2;
5498                 while (len--) {
5499                         if (!is_moving(A[0][x], A[1][x]) &&
5500                             (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5501                                 switch (hw[dw] & 4095) {
5502                                 case 0x30: /* RING_HEAD */
5503                                 case 0x34: /* RING_TAIL */
5504                                         break;
5505
5506                                 default:
5507                                         pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5508                                                engine->name, dw,
5509                                                hw[dw], hw[dw + 1],
5510                                                A[0][x], B[0][x], B[1][x],
5511                                                poison, lrc[dw + 1]);
5512                                         err = -EINVAL;
5513                                 }
5514                         }
5515                         dw += 2;
5516                         x++;
5517                 }
5518         } while (dw < PAGE_SIZE / sizeof(u32) &&
5519                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5520
5521         shmem_unpin_map(ce->engine->default_state, defaults);
5522 err_lrc:
5523         i915_gem_object_unpin_map(ce->state->obj);
5524 err_B1:
5525         i915_gem_object_unpin_map(result[1]->obj);
5526 err_B0:
5527         i915_gem_object_unpin_map(result[0]->obj);
5528 err_A1:
5529         i915_gem_object_unpin_map(ref[1]->obj);
5530 err_A0:
5531         i915_gem_object_unpin_map(ref[0]->obj);
5532         return err;
5533 }
5534
5535 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5536 {
5537         u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5538         struct i915_vma *ref[2], *result[2];
5539         struct intel_context *A, *B;
5540         struct i915_request *rq;
5541         int err;
5542
5543         A = intel_context_create(engine);
5544         if (IS_ERR(A))
5545                 return PTR_ERR(A);
5546
5547         B = intel_context_create(engine);
5548         if (IS_ERR(B)) {
5549                 err = PTR_ERR(B);
5550                 goto err_A;
5551         }
5552
5553         ref[0] = create_user_vma(A->vm, SZ_64K);
5554         if (IS_ERR(ref[0])) {
5555                 err = PTR_ERR(ref[0]);
5556                 goto err_B;
5557         }
5558
5559         ref[1] = create_user_vma(A->vm, SZ_64K);
5560         if (IS_ERR(ref[1])) {
5561                 err = PTR_ERR(ref[1]);
5562                 goto err_ref0;
5563         }
5564
5565         rq = record_registers(A, ref[0], ref[1], sema);
5566         if (IS_ERR(rq)) {
5567                 err = PTR_ERR(rq);
5568                 goto err_ref1;
5569         }
5570
5571         WRITE_ONCE(*sema, 1);
5572         wmb();
5573
5574         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5575                 i915_request_put(rq);
5576                 err = -ETIME;
5577                 goto err_ref1;
5578         }
5579         i915_request_put(rq);
5580
5581         result[0] = create_user_vma(A->vm, SZ_64K);
5582         if (IS_ERR(result[0])) {
5583                 err = PTR_ERR(result[0]);
5584                 goto err_ref1;
5585         }
5586
5587         result[1] = create_user_vma(A->vm, SZ_64K);
5588         if (IS_ERR(result[1])) {
5589                 err = PTR_ERR(result[1]);
5590                 goto err_result0;
5591         }
5592
5593         rq = record_registers(A, result[0], result[1], sema);
5594         if (IS_ERR(rq)) {
5595                 err = PTR_ERR(rq);
5596                 goto err_result1;
5597         }
5598
5599         err = poison_registers(B, poison, sema);
5600         if (err) {
5601                 WRITE_ONCE(*sema, -1);
5602                 i915_request_put(rq);
5603                 goto err_result1;
5604         }
5605
5606         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5607                 i915_request_put(rq);
5608                 err = -ETIME;
5609                 goto err_result1;
5610         }
5611         i915_request_put(rq);
5612
5613         err = compare_isolation(engine, ref, result, A, poison);
5614
5615 err_result1:
5616         i915_vma_put(result[1]);
5617 err_result0:
5618         i915_vma_put(result[0]);
5619 err_ref1:
5620         i915_vma_put(ref[1]);
5621 err_ref0:
5622         i915_vma_put(ref[0]);
5623 err_B:
5624         intel_context_put(B);
5625 err_A:
5626         intel_context_put(A);
5627         return err;
5628 }
5629
5630 static bool skip_isolation(const struct intel_engine_cs *engine)
5631 {
5632         if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5633                 return true;
5634
5635         if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5636                 return true;
5637
5638         return false;
5639 }
5640
5641 static int live_lrc_isolation(void *arg)
5642 {
5643         struct intel_gt *gt = arg;
5644         struct intel_engine_cs *engine;
5645         enum intel_engine_id id;
5646         const u32 poison[] = {
5647                 STACK_MAGIC,
5648                 0x3a3a3a3a,
5649                 0x5c5c5c5c,
5650                 0xffffffff,
5651                 0xffff0000,
5652         };
5653         int err = 0;
5654
5655         /*
5656          * Our goal is try and verify that per-context state cannot be
5657          * tampered with by another non-privileged client.
5658          *
5659          * We take the list of context registers from the LRI in the default
5660          * context image and attempt to modify that list from a remote context.
5661          */
5662
5663         for_each_engine(engine, gt, id) {
5664                 int i;
5665
5666                 /* Just don't even ask */
5667                 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5668                     skip_isolation(engine))
5669                         continue;
5670
5671                 intel_engine_pm_get(engine);
5672                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5673                         int result;
5674
5675                         result = __lrc_isolation(engine, poison[i]);
5676                         if (result && !err)
5677                                 err = result;
5678
5679                         result = __lrc_isolation(engine, ~poison[i]);
5680                         if (result && !err)
5681                                 err = result;
5682                 }
5683                 intel_engine_pm_put(engine);
5684                 if (igt_flush_test(gt->i915)) {
5685                         err = -EIO;
5686                         break;
5687                 }
5688         }
5689
5690         return err;
5691 }
5692
5693 static int indirect_ctx_submit_req(struct intel_context *ce)
5694 {
5695         struct i915_request *rq;
5696         int err = 0;
5697
5698         rq = intel_context_create_request(ce);
5699         if (IS_ERR(rq))
5700                 return PTR_ERR(rq);
5701
5702         i915_request_get(rq);
5703         i915_request_add(rq);
5704
5705         if (i915_request_wait(rq, 0, HZ / 5) < 0)
5706                 err = -ETIME;
5707
5708         i915_request_put(rq);
5709
5710         return err;
5711 }
5712
5713 #define CTX_BB_CANARY_OFFSET (3 * 1024)
5714 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
5715
5716 static u32 *
5717 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
5718 {
5719         *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
5720                 MI_SRM_LRM_GLOBAL_GTT |
5721                 MI_LRI_LRM_CS_MMIO;
5722         *cs++ = i915_mmio_reg_offset(RING_START(0));
5723         *cs++ = i915_ggtt_offset(ce->state) +
5724                 context_wa_bb_offset(ce) +
5725                 CTX_BB_CANARY_OFFSET;
5726         *cs++ = 0;
5727
5728         return cs;
5729 }
5730
5731 static void
5732 indirect_ctx_bb_setup(struct intel_context *ce)
5733 {
5734         u32 *cs = context_indirect_bb(ce);
5735
5736         cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
5737
5738         setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
5739 }
5740
5741 static bool check_ring_start(struct intel_context *ce)
5742 {
5743         const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
5744                 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
5745
5746         if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
5747                 return true;
5748
5749         pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
5750                ctx_bb[CTX_BB_CANARY_INDEX],
5751                ce->lrc_reg_state[CTX_RING_START]);
5752
5753         return false;
5754 }
5755
5756 static int indirect_ctx_bb_check(struct intel_context *ce)
5757 {
5758         int err;
5759
5760         err = indirect_ctx_submit_req(ce);
5761         if (err)
5762                 return err;
5763
5764         if (!check_ring_start(ce))
5765                 return -EINVAL;
5766
5767         return 0;
5768 }
5769
5770 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
5771 {
5772         struct intel_context *a, *b;
5773         int err;
5774
5775         a = intel_context_create(engine);
5776         if (IS_ERR(a))
5777                 return PTR_ERR(a);
5778         err = intel_context_pin(a);
5779         if (err)
5780                 goto put_a;
5781
5782         b = intel_context_create(engine);
5783         if (IS_ERR(b)) {
5784                 err = PTR_ERR(b);
5785                 goto unpin_a;
5786         }
5787         err = intel_context_pin(b);
5788         if (err)
5789                 goto put_b;
5790
5791         /* We use the already reserved extra page in context state */
5792         if (!a->wa_bb_page) {
5793                 GEM_BUG_ON(b->wa_bb_page);
5794                 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
5795                 goto unpin_b;
5796         }
5797
5798         /*
5799          * In order to test that our per context bb is truly per context,
5800          * and executes at the intended spot on context restoring process,
5801          * make the batch store the ring start value to memory.
5802          * As ring start is restored apriori of starting the indirect ctx bb and
5803          * as it will be different for each context, it fits to this purpose.
5804          */
5805         indirect_ctx_bb_setup(a);
5806         indirect_ctx_bb_setup(b);
5807
5808         err = indirect_ctx_bb_check(a);
5809         if (err)
5810                 goto unpin_b;
5811
5812         err = indirect_ctx_bb_check(b);
5813
5814 unpin_b:
5815         intel_context_unpin(b);
5816 put_b:
5817         intel_context_put(b);
5818 unpin_a:
5819         intel_context_unpin(a);
5820 put_a:
5821         intel_context_put(a);
5822
5823         return err;
5824 }
5825
5826 static int live_lrc_indirect_ctx_bb(void *arg)
5827 {
5828         struct intel_gt *gt = arg;
5829         struct intel_engine_cs *engine;
5830         enum intel_engine_id id;
5831         int err = 0;
5832
5833         for_each_engine(engine, gt, id) {
5834                 intel_engine_pm_get(engine);
5835                 err = __live_lrc_indirect_ctx_bb(engine);
5836                 intel_engine_pm_put(engine);
5837
5838                 if (igt_flush_test(gt->i915))
5839                         err = -EIO;
5840
5841                 if (err)
5842                         break;
5843         }
5844
5845         return err;
5846 }
5847
5848 static void garbage_reset(struct intel_engine_cs *engine,
5849                           struct i915_request *rq)
5850 {
5851         const unsigned int bit = I915_RESET_ENGINE + engine->id;
5852         unsigned long *lock = &engine->gt->reset.flags;
5853
5854         if (test_and_set_bit(bit, lock))
5855                 return;
5856
5857         tasklet_disable(&engine->execlists.tasklet);
5858
5859         if (!rq->fence.error)
5860                 intel_engine_reset(engine, NULL);
5861
5862         tasklet_enable(&engine->execlists.tasklet);
5863         clear_and_wake_up_bit(bit, lock);
5864 }
5865
5866 static struct i915_request *garbage(struct intel_context *ce,
5867                                     struct rnd_state *prng)
5868 {
5869         struct i915_request *rq;
5870         int err;
5871
5872         err = intel_context_pin(ce);
5873         if (err)
5874                 return ERR_PTR(err);
5875
5876         prandom_bytes_state(prng,
5877                             ce->lrc_reg_state,
5878                             ce->engine->context_size -
5879                             LRC_STATE_OFFSET);
5880
5881         rq = intel_context_create_request(ce);
5882         if (IS_ERR(rq)) {
5883                 err = PTR_ERR(rq);
5884                 goto err_unpin;
5885         }
5886
5887         i915_request_get(rq);
5888         i915_request_add(rq);
5889         return rq;
5890
5891 err_unpin:
5892         intel_context_unpin(ce);
5893         return ERR_PTR(err);
5894 }
5895
5896 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
5897 {
5898         struct intel_context *ce;
5899         struct i915_request *hang;
5900         int err = 0;
5901
5902         ce = intel_context_create(engine);
5903         if (IS_ERR(ce))
5904                 return PTR_ERR(ce);
5905
5906         hang = garbage(ce, prng);
5907         if (IS_ERR(hang)) {
5908                 err = PTR_ERR(hang);
5909                 goto err_ce;
5910         }
5911
5912         if (wait_for_submit(engine, hang, HZ / 2)) {
5913                 i915_request_put(hang);
5914                 err = -ETIME;
5915                 goto err_ce;
5916         }
5917
5918         intel_context_set_banned(ce);
5919         garbage_reset(engine, hang);
5920
5921         intel_engine_flush_submission(engine);
5922         if (!hang->fence.error) {
5923                 i915_request_put(hang);
5924                 pr_err("%s: corrupted context was not reset\n",
5925                        engine->name);
5926                 err = -EINVAL;
5927                 goto err_ce;
5928         }
5929
5930         if (i915_request_wait(hang, 0, HZ / 2) < 0) {
5931                 pr_err("%s: corrupted context did not recover\n",
5932                        engine->name);
5933                 i915_request_put(hang);
5934                 err = -EIO;
5935                 goto err_ce;
5936         }
5937         i915_request_put(hang);
5938
5939 err_ce:
5940         intel_context_put(ce);
5941         return err;
5942 }
5943
5944 static int live_lrc_garbage(void *arg)
5945 {
5946         struct intel_gt *gt = arg;
5947         struct intel_engine_cs *engine;
5948         enum intel_engine_id id;
5949
5950         /*
5951          * Verify that we can recover if one context state is completely
5952          * corrupted.
5953          */
5954
5955         if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
5956                 return 0;
5957
5958         for_each_engine(engine, gt, id) {
5959                 I915_RND_STATE(prng);
5960                 int err = 0, i;
5961
5962                 if (!intel_has_reset_engine(engine->gt))
5963                         continue;
5964
5965                 intel_engine_pm_get(engine);
5966                 for (i = 0; i < 3; i++) {
5967                         err = __lrc_garbage(engine, &prng);
5968                         if (err)
5969                                 break;
5970                 }
5971                 intel_engine_pm_put(engine);
5972
5973                 if (igt_flush_test(gt->i915))
5974                         err = -EIO;
5975                 if (err)
5976                         return err;
5977         }
5978
5979         return 0;
5980 }
5981
5982 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
5983 {
5984         struct intel_context *ce;
5985         struct i915_request *rq;
5986         IGT_TIMEOUT(end_time);
5987         int err;
5988
5989         ce = intel_context_create(engine);
5990         if (IS_ERR(ce))
5991                 return PTR_ERR(ce);
5992
5993         ce->runtime.num_underflow = 0;
5994         ce->runtime.max_underflow = 0;
5995
5996         do {
5997                 unsigned int loop = 1024;
5998
5999                 while (loop) {
6000                         rq = intel_context_create_request(ce);
6001                         if (IS_ERR(rq)) {
6002                                 err = PTR_ERR(rq);
6003                                 goto err_rq;
6004                         }
6005
6006                         if (--loop == 0)
6007                                 i915_request_get(rq);
6008
6009                         i915_request_add(rq);
6010                 }
6011
6012                 if (__igt_timeout(end_time, NULL))
6013                         break;
6014
6015                 i915_request_put(rq);
6016         } while (1);
6017
6018         err = i915_request_wait(rq, 0, HZ / 5);
6019         if (err < 0) {
6020                 pr_err("%s: request not completed!\n", engine->name);
6021                 goto err_wait;
6022         }
6023
6024         igt_flush_test(engine->i915);
6025
6026         pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6027                 engine->name,
6028                 intel_context_get_total_runtime_ns(ce),
6029                 intel_context_get_avg_runtime_ns(ce));
6030
6031         err = 0;
6032         if (ce->runtime.num_underflow) {
6033                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6034                        engine->name,
6035                        ce->runtime.num_underflow,
6036                        ce->runtime.max_underflow);
6037                 GEM_TRACE_DUMP();
6038                 err = -EOVERFLOW;
6039         }
6040
6041 err_wait:
6042         i915_request_put(rq);
6043 err_rq:
6044         intel_context_put(ce);
6045         return err;
6046 }
6047
6048 static int live_pphwsp_runtime(void *arg)
6049 {
6050         struct intel_gt *gt = arg;
6051         struct intel_engine_cs *engine;
6052         enum intel_engine_id id;
6053         int err = 0;
6054
6055         /*
6056          * Check that cumulative context runtime as stored in the pphwsp[16]
6057          * is monotonic.
6058          */
6059
6060         for_each_engine(engine, gt, id) {
6061                 err = __live_pphwsp_runtime(engine);
6062                 if (err)
6063                         break;
6064         }
6065
6066         if (igt_flush_test(gt->i915))
6067                 err = -EIO;
6068
6069         return err;
6070 }
6071
6072 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6073 {
6074         static const struct i915_subtest tests[] = {
6075                 SUBTEST(live_lrc_layout),
6076                 SUBTEST(live_lrc_fixed),
6077                 SUBTEST(live_lrc_state),
6078                 SUBTEST(live_lrc_gpr),
6079                 SUBTEST(live_lrc_isolation),
6080                 SUBTEST(live_lrc_timestamp),
6081                 SUBTEST(live_lrc_garbage),
6082                 SUBTEST(live_pphwsp_runtime),
6083                 SUBTEST(live_lrc_indirect_ctx_bb),
6084         };
6085
6086         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6087                 return 0;
6088
6089         return intel_gt_live_subtests(tests, &i915->gt);
6090 }