2 * Copyright (c) 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Mika Kuoppala <mika.kuoppala@intel.com>
37 #include <sys/ioctl.h>
43 #include "intel_bufmgr.h"
44 #include "intel_batchbuffer.h"
45 #include "intel_gpu_tools.h"
46 #include "rendercopy.h"
47 #include "igt_debugfs.h"
50 #define RS_BATCH_ACTIVE (1 << 0)
51 #define RS_BATCH_PENDING (1 << 1)
52 #define RS_UNKNOWN (1 << 2)
54 struct local_drm_i915_reset_stats {
63 struct local_drm_i915_gem_context_create {
68 struct local_drm_i915_gem_context_destroy {
75 #define CONTEXT_CREATE_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2d, struct local_drm_i915_gem_context_create)
76 #define CONTEXT_DESTROY_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2e, struct local_drm_i915_gem_context_destroy)
77 #define GET_RESET_STATS_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x32, struct local_drm_i915_reset_stats)
79 static igt_debugfs_t dfs;
81 static uint32_t context_create(int fd)
83 struct local_drm_i915_gem_context_create create;
86 create.ctx_id = rand();
89 ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
95 static int context_destroy(int fd, uint32_t ctx_id)
98 struct local_drm_i915_gem_context_destroy destroy;
100 destroy.ctx_id = ctx_id;
101 destroy.pad = rand();
103 ret = drmIoctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
110 static int gem_reset_stats(int fd, int ctx_id,
111 struct local_drm_i915_reset_stats *rs)
117 rs->reset_count = rand();
118 rs->batch_active = rand();
119 rs->batch_pending = rand();
123 ret = ioctl(fd, GET_RESET_STATS_IOCTL, rs);
124 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
132 static int gem_reset_status(int fd, int ctx_id)
135 struct local_drm_i915_reset_stats rs;
137 ret = gem_reset_stats(fd, ctx_id, &rs);
142 return RS_BATCH_ACTIVE;
143 if (rs.batch_pending)
144 return RS_BATCH_PENDING;
149 static int gem_exec(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
154 DRM_IOCTL_I915_GEM_EXECBUFFER2,
163 static int exec_valid(int fd, int ctx)
165 struct drm_i915_gem_execbuffer2 execbuf;
166 struct drm_i915_gem_exec_object2 exec;
169 uint32_t buf[2] = { MI_BATCH_BUFFER_END, 0 };
171 exec.handle = gem_create(fd, 4096);
172 gem_write(fd, exec.handle, 0, buf, sizeof(buf));
173 exec.relocation_count = 0;
181 execbuf.buffers_ptr = (uintptr_t)&exec;
182 execbuf.buffer_count = 1;
183 execbuf.batch_start_offset = 0;
184 execbuf.batch_len = sizeof(buf);
185 execbuf.cliprects_ptr = 0;
186 execbuf.num_cliprects = 0;
190 i915_execbuffer2_set_context_id(execbuf, ctx);
193 ret = gem_exec(fd, &execbuf);
200 static void stop_rings(void)
204 fd = igt_debugfs_open(&dfs, "i915_ring_stop", O_WRONLY);
207 igt_assert(write(fd, "0xff", 4) == 4);
211 #define BUFSIZE (4 * 1024)
212 #define ITEMS (BUFSIZE >> 2)
214 static int inject_hang(int fd, int ctx)
216 struct drm_i915_gem_execbuffer2 execbuf;
217 struct drm_i915_gem_exec_object2 exec;
221 unsigned cmd_len = 2;
225 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
228 buf = malloc(BUFSIZE);
229 igt_assert(buf != NULL);
231 buf[0] = MI_BATCH_BUFFER_END;
234 exec.handle = gem_create(fd, BUFSIZE);
235 gem_write(fd, exec.handle, 0, buf, BUFSIZE);
236 exec.relocation_count = 0;
244 execbuf.buffers_ptr = (uintptr_t)&exec;
245 execbuf.buffer_count = 1;
246 execbuf.batch_start_offset = 0;
247 execbuf.batch_len = BUFSIZE;
248 execbuf.cliprects_ptr = 0;
249 execbuf.num_cliprects = 0;
253 i915_execbuffer2_set_context_id(execbuf, ctx);
256 igt_assert(gem_exec(fd, &execbuf) == 0);
258 gtt_off = exec.offset;
260 for (i = 0; i < ITEMS; i++)
263 roff = random() % (ITEMS - cmd_len);
264 buf[roff] = MI_BATCH_BUFFER_START | (cmd_len - 2);
265 buf[roff + 1] = (gtt_off & 0xfffffffc) + (roff << 2);
267 buf[roff + 2] = gtt_off & 0xffffffff00000000ull;
270 printf("loop injected at 0x%lx (off 0x%x, bo_start 0x%lx, bo_end 0x%lx)\n",
271 (long unsigned int)((roff << 2) + gtt_off),
272 roff << 2, (long unsigned int)gtt_off,
273 (long unsigned int)(gtt_off + BUFSIZE - 1));
275 gem_write(fd, exec.handle, 0, buf, BUFSIZE);
277 exec.relocation_count = 0;
285 execbuf.buffers_ptr = (uintptr_t)&exec;
286 execbuf.buffer_count = 1;
287 execbuf.batch_start_offset = 0;
288 execbuf.batch_len = BUFSIZE;
289 execbuf.cliprects_ptr = 0;
290 execbuf.num_cliprects = 0;
294 i915_execbuffer2_set_context_id(execbuf, ctx);
297 igt_assert(gem_exec(fd, &execbuf) == 0);
299 igt_assert(gtt_off == exec.offset);
308 static int _assert_reset_status(int fd, int ctx, int status)
312 rs = gem_reset_status(fd, ctx);
314 printf("reset status for %d ctx %d returned %d\n",
320 printf("%d:%d reset status %d differs from assumed %d\n",
321 fd, ctx, rs, status);
329 #define assert_reset_status(fd, ctx, status) \
330 igt_assert(_assert_reset_status(fd, ctx, status) == 0)
332 static void test_rs(int num_fds, int hang_index, int rs_assumed_no_hang)
338 igt_assert (num_fds <= MAX_FD);
339 igt_assert (hang_index < MAX_FD);
341 for (i = 0; i < num_fds; i++) {
342 fd[i] = drm_open_any();
346 for (i = 0; i < num_fds; i++)
347 assert_reset_status(fd[i], 0, RS_NO_ERROR);
349 for (i = 0; i < num_fds; i++) {
351 h[i] = inject_hang(fd[i], 0);
353 h[i] = exec_valid(fd[i], 0);
356 gem_sync(fd[num_fds - 1], h[num_fds - 1]);
358 for (i = 0; i < num_fds; i++) {
359 if (hang_index < 0) {
360 assert_reset_status(fd[i], 0, rs_assumed_no_hang);
365 assert_reset_status(fd[i], 0, RS_NO_ERROR);
367 assert_reset_status(fd[i], 0, RS_BATCH_ACTIVE);
369 assert_reset_status(fd[i], 0, RS_BATCH_PENDING);
372 for (i = 0; i < num_fds; i++) {
373 gem_close(fd[i], h[i]);
379 static void test_rs_ctx(int num_fds, int num_ctx, int hang_index,
384 int h[MAX_FD][MAX_CTX];
385 int ctx[MAX_FD][MAX_CTX];
387 igt_assert (num_fds <= MAX_FD);
388 igt_assert (hang_index < MAX_FD);
390 igt_assert (num_ctx <= MAX_CTX);
391 igt_assert (hang_context < MAX_CTX);
393 test_rs(num_fds, -1, RS_NO_ERROR);
395 for (i = 0; i < num_fds; i++) {
396 fd[i] = drm_open_any();
398 assert_reset_status(fd[i], 0, RS_NO_ERROR);
400 for (j = 0; j < num_ctx; j++) {
401 ctx[i][j] = context_create(fd[i]);
405 assert_reset_status(fd[i], 0, RS_NO_ERROR);
408 for (i = 0; i < num_fds; i++) {
410 assert_reset_status(fd[i], 0, RS_NO_ERROR);
412 for (j = 0; j < num_ctx; j++)
413 assert_reset_status(fd[i], ctx[i][j], RS_NO_ERROR);
415 assert_reset_status(fd[i], 0, RS_NO_ERROR);
418 for (i = 0; i < num_fds; i++) {
419 for (j = 0; j < num_ctx; j++) {
420 if (i == hang_index && j == hang_context)
421 h[i][j] = inject_hang(fd[i], ctx[i][j]);
423 h[i][j] = exec_valid(fd[i], ctx[i][j]);
427 gem_sync(fd[num_fds - 1], ctx[num_fds - 1][num_ctx - 1]);
429 for (i = 0; i < num_fds; i++)
430 assert_reset_status(fd[i], 0, RS_NO_ERROR);
432 for (i = 0; i < num_fds; i++) {
433 for (j = 0; j < num_ctx; j++) {
435 assert_reset_status(fd[i], ctx[i][j], RS_NO_ERROR);
436 if (i == hang_index && j < hang_context)
437 assert_reset_status(fd[i], ctx[i][j], RS_NO_ERROR);
438 if (i == hang_index && j == hang_context)
439 assert_reset_status(fd[i], ctx[i][j],
441 if (i == hang_index && j > hang_context)
442 assert_reset_status(fd[i], ctx[i][j],
445 assert_reset_status(fd[i], ctx[i][j],
450 for (i = 0; i < num_fds; i++) {
451 for (j = 0; j < num_ctx; j++) {
452 gem_close(fd[i], h[i][j]);
453 igt_assert(context_destroy(fd[i], ctx[i][j]) == 0);
456 assert_reset_status(fd[i], 0, RS_NO_ERROR);
462 static void test_ban(void)
464 int h1,h2,h3,h4,h5,h6,h7;
467 int active_count = 0, pending_count = 0;
468 struct local_drm_i915_reset_stats rs_bad, rs_good;
470 fd_bad = drm_open_any();
471 igt_assert(fd_bad >= 0);
473 fd_good = drm_open_any();
474 igt_assert(fd_good >= 0);
476 assert_reset_status(fd_bad, 0, RS_NO_ERROR);
477 assert_reset_status(fd_good, 0, RS_NO_ERROR);
479 h1 = exec_valid(fd_bad, 0);
481 h5 = exec_valid(fd_good, 0);
484 assert_reset_status(fd_bad, 0, RS_NO_ERROR);
485 assert_reset_status(fd_good, 0, RS_NO_ERROR);
487 h2 = inject_hang(fd_bad, 0);
490 /* Second hang will be pending for this */
493 h6 = exec_valid(fd_good, 0);
494 h7 = exec_valid(fd_good, 0);
497 h3 = inject_hang(fd_bad, 0);
499 gem_sync(fd_bad, h3);
501 /* This second hand will count as pending */
502 assert_reset_status(fd_bad, 0, RS_BATCH_ACTIVE);
504 h4 = exec_valid(fd_bad, 0);
506 gem_close(fd_bad, h3);
510 /* Should not happen often but sometimes hang is declared too slow
511 * due to our way of faking hang using loop */
514 gem_close(fd_bad, h3);
515 gem_close(fd_bad, h4);
517 printf("retrying for ban (%d)\n", retry);
520 igt_assert(h4 == -EIO);
521 assert_reset_status(fd_bad, 0, RS_BATCH_ACTIVE);
523 gem_sync(fd_good, h7);
524 assert_reset_status(fd_good, 0, RS_BATCH_PENDING);
526 igt_assert(gem_reset_stats(fd_good, 0, &rs_good) == 0);
527 igt_assert(gem_reset_stats(fd_bad, 0, &rs_bad) == 0);
529 igt_assert(rs_bad.batch_active == active_count);
530 igt_assert(rs_bad.batch_pending == pending_count);
531 igt_assert(rs_good.batch_active == 0);
532 igt_assert(rs_good.batch_pending == 2);
534 gem_close(fd_bad, h1);
535 gem_close(fd_bad, h2);
536 gem_close(fd_good, h6);
537 gem_close(fd_good, h7);
539 h1 = exec_valid(fd_good, 0);
541 gem_close(fd_good, h1);
546 igt_assert(gem_reset_status(fd_bad, 0) < 0);
547 igt_assert(gem_reset_status(fd_good, 0) < 0);
550 static void test_ban_ctx(void)
552 int h1,h2,h3,h4,h5,h6,h7;
553 int ctx_good, ctx_bad;
556 int active_count = 0, pending_count = 0;
557 struct local_drm_i915_reset_stats rs_bad, rs_good;
562 assert_reset_status(fd, 0, RS_NO_ERROR);
564 ctx_good = context_create(fd);
565 ctx_bad = context_create(fd);
567 assert_reset_status(fd, 0, RS_NO_ERROR);
568 assert_reset_status(fd, ctx_good, RS_NO_ERROR);
569 assert_reset_status(fd, ctx_bad, RS_NO_ERROR);
571 h1 = exec_valid(fd, ctx_bad);
573 h5 = exec_valid(fd, ctx_good);
576 assert_reset_status(fd, ctx_good, RS_NO_ERROR);
577 assert_reset_status(fd, ctx_bad, RS_NO_ERROR);
579 h2 = inject_hang(fd, ctx_bad);
582 /* Second hang will be pending for this */
585 h6 = exec_valid(fd, ctx_good);
586 h7 = exec_valid(fd, ctx_good);
589 h3 = inject_hang(fd, ctx_bad);
593 /* This second hand will count as pending */
594 assert_reset_status(fd, ctx_bad, RS_BATCH_ACTIVE);
596 h4 = exec_valid(fd, ctx_bad);
602 /* Should not happen often but sometimes hang is declared too slow
603 * due to our way of faking hang using loop */
609 printf("retrying for ban (%d)\n", retry);
612 igt_assert(h4 == -EIO);
613 assert_reset_status(fd, ctx_bad, RS_BATCH_ACTIVE);
616 assert_reset_status(fd, ctx_good, RS_BATCH_PENDING);
618 igt_assert(gem_reset_stats(fd, ctx_good, &rs_good) == 0);
619 igt_assert(gem_reset_stats(fd, ctx_bad, &rs_bad) == 0);
621 igt_assert(rs_bad.batch_active == active_count);
622 igt_assert(rs_bad.batch_pending == pending_count);
623 igt_assert(rs_good.batch_active == 0);
624 igt_assert(rs_good.batch_pending == 2);
631 h1 = exec_valid(fd, ctx_good);
635 igt_assert(context_destroy(fd, ctx_good) == 0);
636 igt_assert(context_destroy(fd, ctx_bad) == 0);
637 igt_assert(gem_reset_status(fd, ctx_good) < 0);
638 igt_assert(gem_reset_status(fd, ctx_bad) < 0);
639 igt_assert(exec_valid(fd, ctx_good) < 0);
640 igt_assert(exec_valid(fd, ctx_bad) < 0);
645 static void test_unrelated_ctx(void)
649 int ctx_guilty, ctx_unrelated;
651 fd1 = drm_open_any();
652 fd2 = drm_open_any();
653 assert_reset_status(fd1, 0, RS_NO_ERROR);
654 assert_reset_status(fd2, 0, RS_NO_ERROR);
655 ctx_guilty = context_create(fd1);
656 ctx_unrelated = context_create(fd2);
658 assert_reset_status(fd1, ctx_guilty, RS_NO_ERROR);
659 assert_reset_status(fd2, ctx_unrelated, RS_NO_ERROR);
661 h1 = inject_hang(fd1, ctx_guilty);
664 assert_reset_status(fd1, ctx_guilty, RS_BATCH_ACTIVE);
665 assert_reset_status(fd2, ctx_unrelated, RS_NO_ERROR);
667 h2 = exec_valid(fd2, ctx_unrelated);
670 assert_reset_status(fd1, ctx_guilty, RS_BATCH_ACTIVE);
671 assert_reset_status(fd2, ctx_unrelated, RS_NO_ERROR);
675 igt_assert(context_destroy(fd1, ctx_guilty) == 0);
676 igt_assert(context_destroy(fd2, ctx_unrelated) == 0);
682 static int get_reset_count(int fd, int ctx)
685 struct local_drm_i915_reset_stats rs;
687 ret = gem_reset_stats(fd, ctx, &rs);
691 return rs.reset_count;
694 static void test_close_pending_ctx(void)
701 ctx = context_create(fd);
703 assert_reset_status(fd, ctx, RS_NO_ERROR);
705 h = inject_hang(fd, ctx);
707 igt_assert(context_destroy(fd, ctx) == 0);
708 igt_assert(context_destroy(fd, ctx) == -ENOENT);
714 static void test_close_pending(void)
721 assert_reset_status(fd, 0, RS_NO_ERROR);
723 h = inject_hang(fd, 0);
730 #define LOCAL_I915_EXEC_VEBOX (4 << 0)
732 static const struct target_ring {
737 { I915_EXEC_BLT, gem_has_blt },
738 { I915_EXEC_BSD, gem_has_bsd },
739 { LOCAL_I915_EXEC_VEBOX, gem_has_vebox },
742 #define NUM_RINGS (sizeof(rings)/sizeof(struct target_ring))
744 static void exec_noop_on_each_ring(int fd, const bool reverse)
746 uint32_t batch[2] = {MI_BATCH_BUFFER_END, 0};
748 struct drm_i915_gem_execbuffer2 execbuf;
749 struct drm_i915_gem_exec_object2 exec[1];
751 handle = gem_create(fd, 4096);
752 gem_write(fd, handle, 0, batch, sizeof(batch));
754 exec[0].handle = handle;
755 exec[0].relocation_count = 0;
756 exec[0].relocs_ptr = 0;
757 exec[0].alignment = 0;
763 execbuf.buffers_ptr = (uintptr_t)exec;
764 execbuf.buffer_count = 1;
765 execbuf.batch_start_offset = 0;
766 execbuf.batch_len = 8;
767 execbuf.cliprects_ptr = 0;
768 execbuf.num_cliprects = 0;
772 i915_execbuffer2_set_context_id(execbuf, 0);
775 for (unsigned i = 0; i < NUM_RINGS; i++) {
776 const struct target_ring *ring;
778 ring = reverse ? &rings[NUM_RINGS - 1 - i] : &rings[i];
780 if (!ring->avail || (ring->avail && ring->avail(fd))) {
781 execbuf.flags = ring->exec;
782 do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
786 gem_sync(fd, handle);
787 gem_close(fd, handle);
790 static void test_close_pending_fork(const bool reverse)
798 assert_reset_status(fd, 0, RS_NO_ERROR);
800 h = inject_hang(fd, 0);
805 /* Avoid helpers as we need to kill the child
806 * without any extra signal handling on behalf of
811 const int fd2 = drm_open_any();
812 igt_assert(fd2 >= 0);
814 /* The crucial component is that we schedule the same noop batch
815 * on each ring. This exercises batch_obj reference counting,
816 * when gpu is reset and ring lists are cleared.
818 exec_noop_on_each_ring(fd2, reverse);
826 /* Kill the child to reduce refcounts on
834 /* Then we just wait on hang to happen */
838 h = exec_valid(fd, 0);
846 static void drop_root(void)
848 igt_assert(getuid() == 0);
850 igt_assert(setgid(2) == 0);
851 igt_assert(setuid(2) == 0);
853 igt_assert(getgid() == 2);
854 igt_assert(getuid() == 2);
857 static void test_reset_count(const bool create_ctx)
865 ctx = context_create(fd);
869 assert_reset_status(fd, ctx, RS_NO_ERROR);
871 c1 = get_reset_count(fd, ctx);
874 h = inject_hang(fd, ctx);
878 assert_reset_status(fd, ctx, RS_BATCH_ACTIVE);
879 c2 = get_reset_count(fd, ctx);
881 igt_assert(c2 == (c1 + 1));
886 c2 = get_reset_count(fd, ctx);
889 igt_assert(c2 == -EPERM);
899 context_destroy(fd, ctx);
904 static int _test_params(int fd, int ctx, uint32_t flags, uint32_t pad)
906 struct local_drm_i915_reset_stats rs;
911 rs.reset_count = rand();
912 rs.batch_active = rand();
913 rs.batch_pending = rand();
917 ret = ioctl(fd, GET_RESET_STATS_IOCTL, &rs);
918 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
926 typedef enum { root = 0, user } cap_t;
928 static void test_param_ctx(const int fd, const int ctx, const cap_t cap)
930 const uint32_t bad = rand() + 1;
934 igt_assert(_test_params(fd, ctx, 0, 0) == 0);
936 igt_assert(_test_params(fd, ctx, 0, 0) == -EPERM);
939 igt_assert(_test_params(fd, ctx, 0, bad) == -EINVAL);
940 igt_assert(_test_params(fd, ctx, bad, 0) == -EINVAL);
941 igt_assert(_test_params(fd, ctx, bad, bad) == -EINVAL);
944 static void check_params(const int fd, const int ctx, cap_t cap)
946 igt_assert(ioctl(fd, GET_RESET_STATS_IOCTL, 0) == -1);
947 igt_assert(_test_params(fd, 0xbadbad, 0, 0) == -ENOENT);
949 test_param_ctx(fd, 0, cap);
950 test_param_ctx(fd, ctx, cap);
953 static void _test_param(const int fd, const int ctx)
955 check_params(fd, ctx, root);
958 check_params(fd, ctx, root);
962 check_params(fd, ctx, user);
965 check_params(fd, ctx, root);
970 static void test_params(void)
976 ctx = context_create(fd);
978 _test_param(fd, ctx);
986 struct local_drm_i915_gem_context_create create;
991 igt_skip_on_simulation();
995 devid = intel_get_drm_devid(fd);
996 igt_require_f(intel_gen(devid) >= 4,
997 "Architecture %d too old\n", intel_gen(devid));
999 ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
1000 igt_skip_on_f(ret != 0 && (errno == ENODEV || errno == EINVAL),
1001 "Kernel is too old, or contexts not supported: %s\n",
1004 assert(igt_debugfs_init(&dfs) == 0);
1009 igt_subtest("params")
1012 igt_subtest("reset-stats")
1015 igt_subtest("reset-stats-ctx")
1016 test_rs_ctx(4, 4, 1, 2);
1021 igt_subtest("ban-ctx")
1024 igt_subtest("unrelated-ctx")
1025 test_unrelated_ctx();
1027 igt_subtest("reset-count")
1028 test_reset_count(false);
1030 igt_subtest("reset-count-ctx")
1031 test_reset_count(true);
1033 igt_subtest("close-pending")
1034 test_close_pending();
1036 igt_subtest("close-pending-ctx")
1037 test_close_pending_ctx();
1039 igt_subtest("close-pending-fork") {
1040 test_close_pending_fork(true);
1041 test_close_pending_fork(false);