1 // SPDX-License-Identifier: MIT
3 * Copyright © 2018 Intel Corporation
6 #include <linux/prime_numbers.h>
8 #include "gem/i915_gem_internal.h"
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30 static struct i915_vma *create_scratch(struct intel_gt *gt)
32 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE);
35 static bool is_active(struct i915_request *rq)
37 if (i915_request_is_active(rq))
40 if (i915_request_on_hold(rq))
43 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
49 static int wait_for_submit(struct intel_engine_cs *engine,
50 struct i915_request *rq,
51 unsigned long timeout)
53 /* Ignore our own attempts to suppress excess tasklets */
54 tasklet_hi_schedule(&engine->sched_engine->tasklet);
58 bool done = time_after(jiffies, timeout);
60 if (i915_request_completed(rq)) /* that was quick! */
63 /* Wait until the HW has acknowleged the submission (or err) */
64 intel_engine_flush_submission(engine);
65 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
75 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
78 i915_ggtt_offset(ce->engine->status_page.vma) +
80 struct i915_request *rq;
83 rq = intel_context_create_request(ce);
87 cs = intel_ring_begin(rq, 4);
93 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
98 intel_ring_advance(rq, cs);
100 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
101 i915_request_add(rq);
105 static int context_flush(struct intel_context *ce, long timeout)
107 struct i915_request *rq;
108 struct dma_fence *fence;
111 rq = intel_engine_create_kernel_request(ce->engine);
115 fence = i915_active_fence_get(&ce->timeline->last_request);
117 i915_request_await_dma_fence(rq, fence);
118 dma_fence_put(fence);
121 rq = i915_request_get(rq);
122 i915_request_add(rq);
123 if (i915_request_wait(rq, 0, timeout) < 0)
125 i915_request_put(rq);
127 rmb(); /* We know the request is written, make sure all state is too! */
131 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
133 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
136 if (GRAPHICS_VER(engine->i915) < 12)
139 switch (engine->class) {
144 case COPY_ENGINE_CLASS:
146 case VIDEO_DECODE_CLASS:
147 case VIDEO_ENHANCEMENT_CLASS:
152 static int live_lrc_layout(void *arg)
154 struct intel_gt *gt = arg;
155 struct intel_engine_cs *engine;
156 enum intel_engine_id id;
161 * Check the registers offsets we use to create the initial reg state
162 * match the layout saved by HW.
165 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
168 GEM_BUG_ON(offset_in_page(lrc));
171 for_each_engine(engine, gt, id) {
175 if (!engine->default_state)
178 hw = shmem_pin_map(engine->default_state);
183 hw += LRC_STATE_OFFSET / sizeof(*hw);
185 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
186 engine->kernel_context, engine, true);
190 u32 lri = READ_ONCE(hw[dw]);
199 pr_debug("%s: skipped instruction %x at dword %d\n",
200 engine->name, lri, dw);
205 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
206 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
207 engine->name, dw, lri);
212 if (lrc[dw] != lri) {
213 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
214 engine->name, dw, lri, lrc[dw]);
220 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
221 * opcode is set on Gen12+ devices, HW does not
222 * care about certain register address offsets, and
223 * instead check the following for valid address
224 * ranges on specific engines:
225 * RCS && CCS: BITS(0 - 10)
227 * VECS && VCS: BITS(0 - 13)
229 lri_mask = get_lri_mask(engine, lri);
236 u32 offset = READ_ONCE(hw[dw]);
238 if ((offset ^ lrc[dw]) & lri_mask) {
239 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
240 engine->name, dw, offset, lrc[dw]);
246 * Skip over the actual register value as we
247 * expect that to differ.
252 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
255 pr_info("%s: HW register image:\n", engine->name);
256 igt_hexdump(hw, PAGE_SIZE);
258 pr_info("%s: SW register image:\n", engine->name);
259 igt_hexdump(lrc, PAGE_SIZE);
262 shmem_unpin_map(engine->default_state, hw);
267 free_page((unsigned long)lrc);
271 static int find_offset(const u32 *lri, u32 offset)
275 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
276 if (lri[i] == offset)
282 static int live_lrc_fixed(void *arg)
284 struct intel_gt *gt = arg;
285 struct intel_engine_cs *engine;
286 enum intel_engine_id id;
290 * Check the assumed register offsets match the actual locations in
294 for_each_engine(engine, gt, id) {
301 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
306 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
311 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
316 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
321 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
322 lrc_ring_mi_mode(engine),
326 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
331 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
332 lrc_ring_wa_bb_per_ctx(engine),
333 "RING_BB_PER_CTX_PTR"
336 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
337 lrc_ring_indirect_ptr(engine),
338 "RING_INDIRECT_CTX_PTR"
341 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
342 lrc_ring_indirect_offset(engine),
343 "RING_INDIRECT_CTX_OFFSET"
346 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
351 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
352 lrc_ring_gpr0(engine),
356 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
357 lrc_ring_cmd_buf_cctl(engine),
364 if (!engine->default_state)
367 hw = shmem_pin_map(engine->default_state);
372 hw += LRC_STATE_OFFSET / sizeof(*hw);
374 for (t = tbl; t->name; t++) {
375 int dw = find_offset(hw, t->reg);
377 if (dw != t->offset) {
378 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
388 shmem_unpin_map(engine->default_state, hw);
394 static int __live_lrc_state(struct intel_engine_cs *engine,
395 struct i915_vma *scratch)
397 struct intel_context *ce;
398 struct i915_request *rq;
399 struct i915_gem_ww_ctx ww;
405 u32 expected[MAX_IDX];
410 ce = intel_context_create(engine);
414 i915_gem_ww_ctx_init(&ww, false);
416 err = i915_gem_object_lock(scratch->obj, &ww);
418 err = intel_context_pin_ww(ce, &ww);
422 rq = i915_request_create(ce);
428 cs = intel_ring_begin(rq, 4 * MAX_IDX);
431 i915_request_add(rq);
435 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
436 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
437 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
440 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
442 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
443 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
444 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
447 err = i915_request_await_object(rq, scratch->obj, true);
449 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
451 i915_request_get(rq);
452 i915_request_add(rq);
456 intel_engine_flush_submission(engine);
457 expected[RING_TAIL_IDX] = ce->ring->tail;
459 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
464 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
470 for (n = 0; n < MAX_IDX; n++) {
471 if (cs[n] != expected[n]) {
472 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
473 engine->name, n, cs[n], expected[n]);
479 i915_gem_object_unpin_map(scratch->obj);
482 i915_request_put(rq);
484 intel_context_unpin(ce);
486 if (err == -EDEADLK) {
487 err = i915_gem_ww_ctx_backoff(&ww);
491 i915_gem_ww_ctx_fini(&ww);
492 intel_context_put(ce);
496 static int live_lrc_state(void *arg)
498 struct intel_gt *gt = arg;
499 struct intel_engine_cs *engine;
500 struct i915_vma *scratch;
501 enum intel_engine_id id;
505 * Check the live register state matches what we expect for this
509 scratch = create_scratch(gt);
511 return PTR_ERR(scratch);
513 for_each_engine(engine, gt, id) {
514 err = __live_lrc_state(engine, scratch);
519 if (igt_flush_test(gt->i915))
522 i915_vma_unpin_and_release(&scratch, 0);
526 static int gpr_make_dirty(struct intel_context *ce)
528 struct i915_request *rq;
532 rq = intel_context_create_request(ce);
536 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
538 i915_request_add(rq);
542 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
543 for (n = 0; n < NUM_GPR_DW; n++) {
544 *cs++ = CS_GPR(ce->engine, n);
549 intel_ring_advance(rq, cs);
551 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
552 i915_request_add(rq);
557 static struct i915_request *
558 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
561 i915_ggtt_offset(ce->engine->status_page.vma) +
562 offset_in_page(slot);
563 struct i915_request *rq;
568 rq = intel_context_create_request(ce);
572 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
574 i915_request_add(rq);
578 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
581 *cs++ = MI_SEMAPHORE_WAIT |
582 MI_SEMAPHORE_GLOBAL_GTT |
584 MI_SEMAPHORE_SAD_NEQ_SDD;
589 for (n = 0; n < NUM_GPR_DW; n++) {
590 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
591 *cs++ = CS_GPR(ce->engine, n);
592 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
596 i915_vma_lock(scratch);
597 err = i915_request_await_object(rq, scratch->obj, true);
599 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
600 i915_vma_unlock(scratch);
602 i915_request_get(rq);
603 i915_request_add(rq);
605 i915_request_put(rq);
612 static int __live_lrc_gpr(struct intel_engine_cs *engine,
613 struct i915_vma *scratch,
616 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
617 struct intel_context *ce;
618 struct i915_request *rq;
623 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
624 return 0; /* GPR only on rcs0 for gen8 */
626 err = gpr_make_dirty(engine->kernel_context);
630 ce = intel_context_create(engine);
634 rq = __gpr_read(ce, scratch, slot);
640 err = wait_for_submit(engine, rq, HZ / 2);
645 err = gpr_make_dirty(engine->kernel_context);
649 err = emit_semaphore_signal(engine->kernel_context, slot);
653 err = wait_for_submit(engine, rq, HZ / 2);
661 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
666 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
672 for (n = 0; n < NUM_GPR_DW; n++) {
674 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
676 n / 2, n & 1 ? "udw" : "ldw",
683 i915_gem_object_unpin_map(scratch->obj);
686 memset32(&slot[0], -1, 4);
688 i915_request_put(rq);
690 intel_context_put(ce);
694 static int live_lrc_gpr(void *arg)
696 struct intel_gt *gt = arg;
697 struct intel_engine_cs *engine;
698 struct i915_vma *scratch;
699 enum intel_engine_id id;
703 * Check that GPR registers are cleared in new contexts as we need
704 * to avoid leaking any information from previous contexts.
707 scratch = create_scratch(gt);
709 return PTR_ERR(scratch);
711 for_each_engine(engine, gt, id) {
712 st_engine_heartbeat_disable(engine);
714 err = __live_lrc_gpr(engine, scratch, false);
718 err = __live_lrc_gpr(engine, scratch, true);
723 st_engine_heartbeat_enable(engine);
724 if (igt_flush_test(gt->i915))
730 i915_vma_unpin_and_release(&scratch, 0);
734 static struct i915_request *
735 create_timestamp(struct intel_context *ce, void *slot, int idx)
738 i915_ggtt_offset(ce->engine->status_page.vma) +
739 offset_in_page(slot);
740 struct i915_request *rq;
744 rq = intel_context_create_request(ce);
748 cs = intel_ring_begin(rq, 10);
754 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
757 *cs++ = MI_SEMAPHORE_WAIT |
758 MI_SEMAPHORE_GLOBAL_GTT |
760 MI_SEMAPHORE_SAD_NEQ_SDD;
765 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
766 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
767 *cs++ = offset + idx * sizeof(u32);
770 intel_ring_advance(rq, cs);
774 i915_request_get(rq);
775 i915_request_add(rq);
777 i915_request_put(rq);
784 struct lrc_timestamp {
785 struct intel_engine_cs *engine;
786 struct intel_context *ce[2];
790 static bool timestamp_advanced(u32 start, u32 end)
792 return (s32)(end - start) > 0;
795 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
797 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
798 struct i915_request *rq;
802 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
803 rq = create_timestamp(arg->ce[0], slot, 1);
807 err = wait_for_submit(rq->engine, rq, HZ / 2);
812 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
813 err = emit_semaphore_signal(arg->ce[1], slot);
821 /* And wait for switch to kernel (to save our context to memory) */
822 err = context_flush(arg->ce[0], HZ / 2);
826 if (!timestamp_advanced(arg->poison, slot[1])) {
827 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
828 arg->engine->name, preempt ? "preempt" : "simple",
829 arg->poison, slot[1]);
833 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
834 if (!timestamp_advanced(slot[1], timestamp)) {
835 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
836 arg->engine->name, preempt ? "preempt" : "simple",
842 memset32(slot, -1, 4);
843 i915_request_put(rq);
847 static int live_lrc_timestamp(void *arg)
849 struct lrc_timestamp data = {};
850 struct intel_gt *gt = arg;
851 enum intel_engine_id id;
852 const u32 poison[] = {
860 * We want to verify that the timestamp is saved and restore across
861 * context switches and is monotonic.
863 * So we do this with a little bit of LRC poisoning to check various
864 * boundary conditions, and see what happens if we preempt the context
865 * with a second request (carrying more poison into the timestamp).
868 for_each_engine(data.engine, gt, id) {
871 st_engine_heartbeat_disable(data.engine);
873 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
874 struct intel_context *tmp;
876 tmp = intel_context_create(data.engine);
882 err = intel_context_pin(tmp);
884 intel_context_put(tmp);
891 for (i = 0; i < ARRAY_SIZE(poison); i++) {
892 data.poison = poison[i];
894 err = __lrc_timestamp(&data, false);
898 err = __lrc_timestamp(&data, true);
904 st_engine_heartbeat_enable(data.engine);
905 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
909 intel_context_unpin(data.ce[i]);
910 intel_context_put(data.ce[i]);
913 if (igt_flush_test(gt->i915))
922 static struct i915_vma *
923 create_user_vma(struct i915_address_space *vm, unsigned long size)
925 struct drm_i915_gem_object *obj;
926 struct i915_vma *vma;
929 obj = i915_gem_object_create_internal(vm->i915, size);
931 return ERR_CAST(obj);
933 vma = i915_vma_instance(obj, vm, NULL);
935 i915_gem_object_put(obj);
939 err = i915_vma_pin(vma, 0, 0, PIN_USER);
941 i915_gem_object_put(obj);
948 static u32 safe_poison(u32 offset, u32 poison)
951 * Do not enable predication as it will nop all subsequent commands,
952 * not only disabling the tests (by preventing all the other SRM) but
953 * also preventing the arbitration events at the end of the request.
955 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
956 poison &= ~REG_BIT(0);
961 static struct i915_vma *
962 store_context(struct intel_context *ce, struct i915_vma *scratch)
964 struct i915_vma *batch;
968 batch = create_user_vma(ce->vm, SZ_64K);
972 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
978 defaults = shmem_pin_map(ce->engine->default_state);
980 i915_gem_object_unpin_map(batch->obj);
982 return ERR_PTR(-ENOMEM);
988 hw += LRC_STATE_OFFSET / sizeof(*hw);
990 u32 len = hw[dw] & 0x7f;
997 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
1003 len = (len + 1) / 2;
1005 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1007 *cs++ = lower_32_bits(scratch->node.start + x);
1008 *cs++ = upper_32_bits(scratch->node.start + x);
1013 } while (dw < PAGE_SIZE / sizeof(u32) &&
1014 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1016 *cs++ = MI_BATCH_BUFFER_END;
1018 shmem_unpin_map(ce->engine->default_state, defaults);
1020 i915_gem_object_flush_map(batch->obj);
1021 i915_gem_object_unpin_map(batch->obj);
1026 static int move_to_active(struct i915_request *rq,
1027 struct i915_vma *vma,
1033 err = i915_request_await_object(rq, vma->obj, flags);
1035 err = i915_vma_move_to_active(vma, rq, flags);
1036 i915_vma_unlock(vma);
1041 static struct i915_request *
1042 record_registers(struct intel_context *ce,
1043 struct i915_vma *before,
1044 struct i915_vma *after,
1047 struct i915_vma *b_before, *b_after;
1048 struct i915_request *rq;
1052 b_before = store_context(ce, before);
1053 if (IS_ERR(b_before))
1054 return ERR_CAST(b_before);
1056 b_after = store_context(ce, after);
1057 if (IS_ERR(b_after)) {
1058 rq = ERR_CAST(b_after);
1062 rq = intel_context_create_request(ce);
1066 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
1070 err = move_to_active(rq, b_before, 0);
1074 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
1078 err = move_to_active(rq, b_after, 0);
1082 cs = intel_ring_begin(rq, 14);
1088 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1089 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1090 *cs++ = lower_32_bits(b_before->node.start);
1091 *cs++ = upper_32_bits(b_before->node.start);
1093 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1094 *cs++ = MI_SEMAPHORE_WAIT |
1095 MI_SEMAPHORE_GLOBAL_GTT |
1097 MI_SEMAPHORE_SAD_NEQ_SDD;
1099 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1100 offset_in_page(sema);
1104 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1105 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1106 *cs++ = lower_32_bits(b_after->node.start);
1107 *cs++ = upper_32_bits(b_after->node.start);
1109 intel_ring_advance(rq, cs);
1111 WRITE_ONCE(*sema, 0);
1112 i915_request_get(rq);
1113 i915_request_add(rq);
1115 i915_vma_put(b_after);
1117 i915_vma_put(b_before);
1121 i915_request_add(rq);
1126 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1128 struct i915_vma *batch;
1132 batch = create_user_vma(ce->vm, SZ_64K);
1136 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1138 i915_vma_put(batch);
1139 return ERR_CAST(cs);
1142 defaults = shmem_pin_map(ce->engine->default_state);
1144 i915_gem_object_unpin_map(batch->obj);
1145 i915_vma_put(batch);
1146 return ERR_PTR(-ENOMEM);
1151 hw += LRC_STATE_OFFSET / sizeof(*hw);
1153 u32 len = hw[dw] & 0x7f;
1160 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
1166 len = (len + 1) / 2;
1167 *cs++ = MI_LOAD_REGISTER_IMM(len);
1170 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1171 MI_LRI_LRM_CS_MMIO),
1175 } while (dw < PAGE_SIZE / sizeof(u32) &&
1176 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1178 *cs++ = MI_BATCH_BUFFER_END;
1180 shmem_unpin_map(ce->engine->default_state, defaults);
1182 i915_gem_object_flush_map(batch->obj);
1183 i915_gem_object_unpin_map(batch->obj);
1188 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1190 struct i915_request *rq;
1191 struct i915_vma *batch;
1195 batch = load_context(ce, poison);
1197 return PTR_ERR(batch);
1199 rq = intel_context_create_request(ce);
1205 err = move_to_active(rq, batch, 0);
1209 cs = intel_ring_begin(rq, 8);
1215 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1216 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1217 *cs++ = lower_32_bits(batch->node.start);
1218 *cs++ = upper_32_bits(batch->node.start);
1220 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1221 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1222 offset_in_page(sema);
1226 intel_ring_advance(rq, cs);
1228 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1230 i915_request_add(rq);
1232 i915_vma_put(batch);
1236 static bool is_moving(u32 a, u32 b)
1241 static int compare_isolation(struct intel_engine_cs *engine,
1242 struct i915_vma *ref[2],
1243 struct i915_vma *result[2],
1244 struct intel_context *ce,
1247 u32 x, dw, *hw, *lrc;
1252 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1254 return PTR_ERR(A[0]);
1256 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1258 err = PTR_ERR(A[1]);
1262 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1264 err = PTR_ERR(B[0]);
1268 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1270 err = PTR_ERR(B[1]);
1274 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1275 i915_coherent_map_type(engine->i915,
1282 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1284 defaults = shmem_pin_map(ce->engine->default_state);
1293 hw += LRC_STATE_OFFSET / sizeof(*hw);
1295 u32 len = hw[dw] & 0x7f;
1302 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
1308 len = (len + 1) / 2;
1310 if (!is_moving(A[0][x], A[1][x]) &&
1311 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1312 switch (hw[dw] & 4095) {
1313 case 0x30: /* RING_HEAD */
1314 case 0x34: /* RING_TAIL */
1318 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1321 A[0][x], B[0][x], B[1][x],
1322 poison, lrc[dw + 1]);
1329 } while (dw < PAGE_SIZE / sizeof(u32) &&
1330 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1332 shmem_unpin_map(ce->engine->default_state, defaults);
1334 i915_gem_object_unpin_map(ce->state->obj);
1336 i915_gem_object_unpin_map(result[1]->obj);
1338 i915_gem_object_unpin_map(result[0]->obj);
1340 i915_gem_object_unpin_map(ref[1]->obj);
1342 i915_gem_object_unpin_map(ref[0]->obj);
1346 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1348 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1349 struct i915_vma *ref[2], *result[2];
1350 struct intel_context *A, *B;
1351 struct i915_request *rq;
1354 A = intel_context_create(engine);
1358 B = intel_context_create(engine);
1364 ref[0] = create_user_vma(A->vm, SZ_64K);
1365 if (IS_ERR(ref[0])) {
1366 err = PTR_ERR(ref[0]);
1370 ref[1] = create_user_vma(A->vm, SZ_64K);
1371 if (IS_ERR(ref[1])) {
1372 err = PTR_ERR(ref[1]);
1376 rq = record_registers(A, ref[0], ref[1], sema);
1382 WRITE_ONCE(*sema, 1);
1385 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1386 i915_request_put(rq);
1390 i915_request_put(rq);
1392 result[0] = create_user_vma(A->vm, SZ_64K);
1393 if (IS_ERR(result[0])) {
1394 err = PTR_ERR(result[0]);
1398 result[1] = create_user_vma(A->vm, SZ_64K);
1399 if (IS_ERR(result[1])) {
1400 err = PTR_ERR(result[1]);
1404 rq = record_registers(A, result[0], result[1], sema);
1410 err = poison_registers(B, poison, sema);
1412 WRITE_ONCE(*sema, -1);
1413 i915_request_put(rq);
1417 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1418 i915_request_put(rq);
1422 i915_request_put(rq);
1424 err = compare_isolation(engine, ref, result, A, poison);
1427 i915_vma_put(result[1]);
1429 i915_vma_put(result[0]);
1431 i915_vma_put(ref[1]);
1433 i915_vma_put(ref[0]);
1435 intel_context_put(B);
1437 intel_context_put(A);
1441 static bool skip_isolation(const struct intel_engine_cs *engine)
1443 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1446 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1452 static int live_lrc_isolation(void *arg)
1454 struct intel_gt *gt = arg;
1455 struct intel_engine_cs *engine;
1456 enum intel_engine_id id;
1457 const u32 poison[] = {
1467 * Our goal is try and verify that per-context state cannot be
1468 * tampered with by another non-privileged client.
1470 * We take the list of context registers from the LRI in the default
1471 * context image and attempt to modify that list from a remote context.
1474 for_each_engine(engine, gt, id) {
1477 /* Just don't even ask */
1478 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1479 skip_isolation(engine))
1482 intel_engine_pm_get(engine);
1483 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1486 result = __lrc_isolation(engine, poison[i]);
1490 result = __lrc_isolation(engine, ~poison[i]);
1494 intel_engine_pm_put(engine);
1495 if (igt_flush_test(gt->i915)) {
1504 static int indirect_ctx_submit_req(struct intel_context *ce)
1506 struct i915_request *rq;
1509 rq = intel_context_create_request(ce);
1513 i915_request_get(rq);
1514 i915_request_add(rq);
1516 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1519 i915_request_put(rq);
1524 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1525 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1528 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1530 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1531 MI_SRM_LRM_GLOBAL_GTT |
1533 *cs++ = i915_mmio_reg_offset(RING_START(0));
1534 *cs++ = i915_ggtt_offset(ce->state) +
1535 context_wa_bb_offset(ce) +
1536 CTX_BB_CANARY_OFFSET;
1543 indirect_ctx_bb_setup(struct intel_context *ce)
1545 u32 *cs = context_indirect_bb(ce);
1547 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1549 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1552 static bool check_ring_start(struct intel_context *ce)
1554 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1555 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
1557 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1560 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1561 ctx_bb[CTX_BB_CANARY_INDEX],
1562 ce->lrc_reg_state[CTX_RING_START]);
1567 static int indirect_ctx_bb_check(struct intel_context *ce)
1571 err = indirect_ctx_submit_req(ce);
1575 if (!check_ring_start(ce))
1581 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
1583 struct intel_context *a, *b;
1586 a = intel_context_create(engine);
1589 err = intel_context_pin(a);
1593 b = intel_context_create(engine);
1598 err = intel_context_pin(b);
1602 /* We use the already reserved extra page in context state */
1603 if (!a->wa_bb_page) {
1604 GEM_BUG_ON(b->wa_bb_page);
1605 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1610 * In order to test that our per context bb is truly per context,
1611 * and executes at the intended spot on context restoring process,
1612 * make the batch store the ring start value to memory.
1613 * As ring start is restored apriori of starting the indirect ctx bb and
1614 * as it will be different for each context, it fits to this purpose.
1616 indirect_ctx_bb_setup(a);
1617 indirect_ctx_bb_setup(b);
1619 err = indirect_ctx_bb_check(a);
1623 err = indirect_ctx_bb_check(b);
1626 intel_context_unpin(b);
1628 intel_context_put(b);
1630 intel_context_unpin(a);
1632 intel_context_put(a);
1637 static int live_lrc_indirect_ctx_bb(void *arg)
1639 struct intel_gt *gt = arg;
1640 struct intel_engine_cs *engine;
1641 enum intel_engine_id id;
1644 for_each_engine(engine, gt, id) {
1645 intel_engine_pm_get(engine);
1646 err = __live_lrc_indirect_ctx_bb(engine);
1647 intel_engine_pm_put(engine);
1649 if (igt_flush_test(gt->i915))
1659 static void garbage_reset(struct intel_engine_cs *engine,
1660 struct i915_request *rq)
1662 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1663 unsigned long *lock = &engine->gt->reset.flags;
1666 if (!test_and_set_bit(bit, lock)) {
1667 tasklet_disable(&engine->sched_engine->tasklet);
1669 if (!rq->fence.error)
1670 __intel_engine_reset_bh(engine, NULL);
1672 tasklet_enable(&engine->sched_engine->tasklet);
1673 clear_and_wake_up_bit(bit, lock);
1678 static struct i915_request *garbage(struct intel_context *ce,
1679 struct rnd_state *prng)
1681 struct i915_request *rq;
1684 err = intel_context_pin(ce);
1686 return ERR_PTR(err);
1688 prandom_bytes_state(prng,
1690 ce->engine->context_size -
1693 rq = intel_context_create_request(ce);
1699 i915_request_get(rq);
1700 i915_request_add(rq);
1704 intel_context_unpin(ce);
1705 return ERR_PTR(err);
1708 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1710 struct intel_context *ce;
1711 struct i915_request *hang;
1714 ce = intel_context_create(engine);
1718 hang = garbage(ce, prng);
1720 err = PTR_ERR(hang);
1724 if (wait_for_submit(engine, hang, HZ / 2)) {
1725 i915_request_put(hang);
1730 intel_context_set_banned(ce);
1731 garbage_reset(engine, hang);
1733 intel_engine_flush_submission(engine);
1734 if (!hang->fence.error) {
1735 i915_request_put(hang);
1736 pr_err("%s: corrupted context was not reset\n",
1742 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1743 pr_err("%s: corrupted context did not recover\n",
1745 i915_request_put(hang);
1749 i915_request_put(hang);
1752 intel_context_put(ce);
1756 static int live_lrc_garbage(void *arg)
1758 struct intel_gt *gt = arg;
1759 struct intel_engine_cs *engine;
1760 enum intel_engine_id id;
1763 * Verify that we can recover if one context state is completely
1767 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1770 for_each_engine(engine, gt, id) {
1771 I915_RND_STATE(prng);
1774 if (!intel_has_reset_engine(engine->gt))
1777 intel_engine_pm_get(engine);
1778 for (i = 0; i < 3; i++) {
1779 err = __lrc_garbage(engine, &prng);
1783 intel_engine_pm_put(engine);
1785 if (igt_flush_test(gt->i915))
1794 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1796 struct intel_context *ce;
1797 struct i915_request *rq;
1798 IGT_TIMEOUT(end_time);
1801 ce = intel_context_create(engine);
1805 ce->stats.runtime.num_underflow = 0;
1806 ce->stats.runtime.max_underflow = 0;
1809 unsigned int loop = 1024;
1812 rq = intel_context_create_request(ce);
1819 i915_request_get(rq);
1821 i915_request_add(rq);
1824 if (__igt_timeout(end_time, NULL))
1827 i915_request_put(rq);
1830 err = i915_request_wait(rq, 0, HZ / 5);
1832 pr_err("%s: request not completed!\n", engine->name);
1836 igt_flush_test(engine->i915);
1838 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1840 intel_context_get_total_runtime_ns(ce),
1841 intel_context_get_avg_runtime_ns(ce));
1844 if (ce->stats.runtime.num_underflow) {
1845 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1847 ce->stats.runtime.num_underflow,
1848 ce->stats.runtime.max_underflow);
1854 i915_request_put(rq);
1856 intel_context_put(ce);
1860 static int live_pphwsp_runtime(void *arg)
1862 struct intel_gt *gt = arg;
1863 struct intel_engine_cs *engine;
1864 enum intel_engine_id id;
1868 * Check that cumulative context runtime as stored in the pphwsp[16]
1872 for_each_engine(engine, gt, id) {
1873 err = __live_pphwsp_runtime(engine);
1878 if (igt_flush_test(gt->i915))
1884 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1886 static const struct i915_subtest tests[] = {
1887 SUBTEST(live_lrc_layout),
1888 SUBTEST(live_lrc_fixed),
1889 SUBTEST(live_lrc_state),
1890 SUBTEST(live_lrc_gpr),
1891 SUBTEST(live_lrc_isolation),
1892 SUBTEST(live_lrc_timestamp),
1893 SUBTEST(live_lrc_garbage),
1894 SUBTEST(live_pphwsp_runtime),
1895 SUBTEST(live_lrc_indirect_ctx_bb),
1898 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1901 return intel_gt_live_subtests(tests, to_gt(i915));