2 * Copyright © 2011 Daniel Vetter
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Daniel Vetter <daniel.vetter@ffwll.ch>
26 * Partially based upon gem_tiled_fence_blits.c
29 /** @file gem_stress.c
31 * This is a general gem coherency test. It's designed to eventually replicate
32 * any possible sequence of access patterns. It works by copying a set of tiles
33 * between two sets of backing buffer objects, randomly permutating the assinged
34 * position on each copy operations.
36 * The copy operation are done in tiny portions (to reduce any race windows
37 * for corruptions, hence increasing the chances for observing one) and are
38 * constantly switched between all means to copy stuff (fenced blitter, unfenced
39 * render, mmap, pwrite/read).
41 * After every complete move of a set tiling parameters of a buffer are randomly
42 * changed to simulate the effects of libdrm caching.
44 * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
45 * split up to test relaxed fencing. Using this to push the average working set
46 * size over the available gtt space forces objects to be mapped as unfenceable
47 * (and as a side-effect tests gtt map/unmap coherency).
49 * In short: designed for maximum evilness.
53 #include <sys/ioctl.h>
65 #include "ioctl_wrappers.h"
67 #include "intel_bufmgr.h"
68 #include "intel_batchbuffer.h"
70 #include "intel_chipset.h"
73 #define CMD_POLY_STIPPLE_OFFSET 0x7906
76 * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
77 * - render copy (to check fence tracking and cache coherency management by the
79 * - multi-threading: probably just a wrapper script to launch multiple
80 * instances + an option to accordingly reduce the working set
81 * - gen6 inter-ring coherency (needs render copy, first)
82 * - variable buffer size
83 * - add an option to fork a second process that randomly sends signals to the
84 * first one (to check consistency of the kernel recovery paths)
87 drm_intel_bufmgr *bufmgr;
88 struct intel_batchbuffer *batch;
93 drm_intel_bo *busy_bo;
95 struct option_struct {
96 unsigned scratch_buf_size;
97 unsigned max_dimension;
111 int check_render_cpyfn;
112 int use_signal_helper;
115 struct option_struct options;
117 #define MAX_BUFS 4096
118 #define SCRATCH_BUF_SIZE 1024*1024
119 #define BUSY_BUF_SIZE (256*4096)
120 #define TILE_BYTES(size) ((size)*(size)*sizeof(uint32_t))
122 static struct igt_buf buffers[2][MAX_BUFS];
123 /* tile i is at logical position tile_permutation[i] */
124 static unsigned *tile_permutation;
125 static unsigned num_buffers = 0;
126 static unsigned current_set = 0;
127 static unsigned target_set = 0;
128 static unsigned num_total_tiles = 0;
131 static int gpu_busy_load = 10;
135 unsigned max_failed_reads;
138 static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
140 igt_assert(tile < buf->num_tiles);
141 *x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
142 *y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
145 static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
146 unsigned src_x, unsigned src_y, unsigned w, unsigned h,
147 drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
148 unsigned dst_x, unsigned dst_y)
150 uint32_t cmd_bits = 0;
152 if (IS_965(devid) && src_tiling) {
154 cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
157 if (IS_965(devid) && dst_tiling) {
159 cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
162 /* copy lower half to upper half */
163 BLIT_COPY_BATCH_START(devid, cmd_bits);
164 OUT_BATCH((3 << 24) | /* 32 bits */
165 (0xcc << 16) | /* copy ROP */
167 OUT_BATCH(dst_y << 16 | dst_x);
168 OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
169 OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
170 BLIT_RELOC_UDW(devid);
171 OUT_BATCH(src_y << 16 | src_x);
172 OUT_BATCH(src_pitch);
173 OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
174 BLIT_RELOC_UDW(devid);
177 if (IS_GEN6(devid) || IS_GEN7(devid)) {
179 OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
186 /* All this gem trashing wastes too much cpu time, so give the gpu something to
187 * do to increase changes for races. */
188 static void keep_gpu_busy(void)
192 tmp = 1 << gpu_busy_load;
193 igt_assert(tmp <= 1024);
195 emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
196 busy_bo, 0, 4096, 0, 128);
199 static void set_to_cpu_domain(struct igt_buf *buf, int writing)
201 gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
202 writing ? I915_GEM_DOMAIN_CPU : 0);
205 static unsigned int copyfunc_seq = 0;
206 static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
207 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
208 unsigned logical_tile_no);
210 /* stride, x, y in units of uint32_t! */
211 static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
212 uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
213 unsigned logical_tile_no)
218 for (i = 0; i < options.tile_size; i++) {
219 for (j = 0; j < options.tile_size; j++) {
220 unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
221 unsigned src_ofs = src_x + j + src_stride * (src_y + i);
222 unsigned expect = logical_tile_no*options.tile_size*options.tile_size
223 + i*options.tile_size + j;
224 uint32_t tmp = src[src_ofs];
226 igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
227 igt_fail_on(options.trace_tile >= 0 && options.fail);
230 /* when not aborting, correct any errors */
231 dst[dst_ofs] = expect;
234 igt_fail_on(failed && options.fail);
236 if (failed > stats.max_failed_reads)
237 stats.max_failed_reads = failed;
242 static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
243 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
244 unsigned logical_tile_no)
246 igt_assert(batch->ptr == batch->buffer);
248 if (options.ducttape)
249 drm_intel_bo_wait_rendering(dst->bo);
251 if (options.use_cpu_maps) {
252 set_to_cpu_domain(src, 0);
253 set_to_cpu_domain(dst, 1);
256 cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
257 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
261 static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
262 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
263 unsigned logical_tile_no)
265 uint32_t tmp_tile[options.tile_size*options.tile_size];
268 igt_assert(batch->ptr == batch->buffer);
270 if (options.ducttape)
271 drm_intel_bo_wait_rendering(dst->bo);
273 if (src->tiling == I915_TILING_NONE) {
274 for (i = 0; i < options.tile_size; i++) {
275 unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
276 drm_intel_bo_get_subdata(src->bo, ofs,
277 options.tile_size*sizeof(uint32_t),
278 tmp_tile + options.tile_size*i);
281 if (options.use_cpu_maps)
282 set_to_cpu_domain(src, 0);
284 cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
285 tmp_tile, options.tile_size, 0, 0, logical_tile_no);
288 if (dst->tiling == I915_TILING_NONE) {
289 for (i = 0; i < options.tile_size; i++) {
290 unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
291 drm_intel_bo_subdata(dst->bo, ofs,
292 options.tile_size*sizeof(uint32_t),
293 tmp_tile + options.tile_size*i);
296 if (options.use_cpu_maps)
297 set_to_cpu_domain(dst, 1);
299 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
300 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
305 static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
306 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
307 unsigned logical_tile_no)
309 static unsigned keep_gpu_busy_counter = 0;
311 /* check both edges of the fence usage */
312 if (keep_gpu_busy_counter & 1 && !fence_storm)
315 emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
316 options.tile_size, options.tile_size,
317 dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
319 if (!(keep_gpu_busy_counter & 1) && !fence_storm)
322 keep_gpu_busy_counter++;
329 if (fence_storm <= 1) {
331 intel_batchbuffer_flush(batch);
335 static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
336 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
337 unsigned logical_tile_no)
339 static unsigned keep_gpu_busy_counter = 0;
340 igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
342 /* check both edges of the fence usage */
343 if (keep_gpu_busy_counter & 1)
348 * Flush outstanding blts so that they don't end up on
349 * the render ring when that's not allowed (gen6+).
351 intel_batchbuffer_flush(batch);
352 rendercopy(batch, NULL, src, src_x, src_y,
353 options.tile_size, options.tile_size,
356 blitter_copyfunc(src, src_x, src_y,
359 if (!(keep_gpu_busy_counter & 1))
362 keep_gpu_busy_counter++;
363 intel_batchbuffer_flush(batch);
366 static void next_copyfunc(int tile)
369 if (tile == options.trace_tile)
370 igt_info(" using fence storm\n");
374 if (copyfunc_seq % 61 == 0
375 && options.forced_tiling != I915_TILING_NONE) {
376 if (tile == options.trace_tile)
377 igt_info(" using fence storm\n");
378 fence_storm = num_fences;
379 copyfunc = blitter_copyfunc;
380 } else if (copyfunc_seq % 17 == 0) {
381 if (tile == options.trace_tile)
382 igt_info(" using cpu\n");
383 copyfunc = cpu_copyfunc;
384 } else if (copyfunc_seq % 19 == 0) {
385 if (tile == options.trace_tile)
386 igt_info(" using prw\n");
387 copyfunc = prw_copyfunc;
388 } else if (copyfunc_seq % 3 == 0 && options.use_render) {
389 if (tile == options.trace_tile)
390 igt_info(" using render\n");
391 copyfunc = render_copyfunc;
392 } else if (options.use_blt){
393 if (tile == options.trace_tile)
394 igt_info(" using blitter\n");
395 copyfunc = blitter_copyfunc;
396 } else if (options.use_render){
397 if (tile == options.trace_tile)
398 igt_info(" using render\n");
399 copyfunc = render_copyfunc;
401 copyfunc = cpu_copyfunc;
407 static void fan_out(void)
409 uint32_t tmp_tile[options.tile_size*options.tile_size];
412 unsigned tile, buf_idx, x, y;
414 for (i = 0; i < num_total_tiles; i++) {
416 buf_idx = tile / options.tiles_per_buf;
417 tile %= options.tiles_per_buf;
419 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
421 for (k = 0; k < options.tile_size*options.tile_size; k++)
424 if (options.use_cpu_maps)
425 set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
427 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
428 buffers[current_set][buf_idx].data,
429 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
433 for (i = 0; i < num_total_tiles; i++)
434 tile_permutation[i] = i;
437 static void fan_in_and_check(void)
439 uint32_t tmp_tile[options.tile_size*options.tile_size];
440 unsigned tile, buf_idx, x, y;
442 for (i = 0; i < num_total_tiles; i++) {
443 tile = tile_permutation[i];
444 buf_idx = tile / options.tiles_per_buf;
445 tile %= options.tiles_per_buf;
447 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
449 if (options.use_cpu_maps)
450 set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
452 cpucpy2d(buffers[current_set][buf_idx].data,
453 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
455 tmp_tile, options.tile_size, 0, 0,
460 static void sanitize_stride(struct igt_buf *buf)
463 if (igt_buf_height(buf) > options.max_dimension)
464 buf->stride = buf->size / options.max_dimension;
466 if (igt_buf_height(buf) < options.tile_size)
467 buf->stride = buf->size / options.tile_size;
469 if (igt_buf_width(buf) < options.tile_size)
470 buf->stride = options.tile_size * sizeof(uint32_t);
472 igt_assert(buf->stride <= 8192);
473 igt_assert(igt_buf_width(buf) <= options.max_dimension);
474 igt_assert(igt_buf_height(buf) <= options.max_dimension);
476 igt_assert(igt_buf_width(buf) >= options.tile_size);
477 igt_assert(igt_buf_height(buf) >= options.tile_size);
481 static void init_buffer(struct igt_buf *buf, unsigned size)
483 buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
486 buf->tiling = I915_TILING_NONE;
489 sanitize_stride(buf);
492 buf->data = malloc(size);
494 if (options.use_cpu_maps)
495 drm_intel_bo_map(buf->bo, 1);
497 drm_intel_gem_bo_map_gtt(buf->bo);
498 buf->data = buf->bo->virtual;
501 buf->num_tiles = options.tiles_per_buf;
504 static void exchange_buf(void *array, unsigned i, unsigned j)
506 struct igt_buf *buf_arr, tmp;
509 memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
510 memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
511 memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
515 static void init_set(unsigned set)
520 igt_permute_array(buffers[set], num_buffers, exchange_buf);
522 if (current_set == 1 && options.gpu_busy_load == 0) {
524 if (gpu_busy_load > 10)
528 for (i = 0; i < num_buffers; i++) {
535 buffers[set][i].tiling = I915_TILING_X;
537 buffers[set][i].tiling = I915_TILING_NONE;
539 if (options.forced_tiling >= 0)
540 buffers[set][i].tiling = options.forced_tiling;
542 if (buffers[set][i].tiling == I915_TILING_NONE) {
543 /* min 64 byte stride */
545 buffers[set][i].stride = 64 * (1 << r);
546 } else if (IS_GEN2(devid)) {
547 /* min 128 byte stride */
549 buffers[set][i].stride = 128 * (1 << r);
551 /* min 512 byte stride */
553 buffers[set][i].stride = 512 * (1 << r);
556 sanitize_stride(&buffers[set][i]);
558 gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
559 buffers[set][i].tiling,
560 buffers[set][i].stride);
562 if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
563 igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
567 static void exchange_uint(void *array, unsigned i, unsigned j)
569 unsigned *i_arr = array;
577 static void copy_tiles(unsigned *permutation)
579 unsigned src_tile, src_buf_idx, src_x, src_y;
580 unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
581 struct igt_buf *src_buf, *dst_buf;
583 for (i = 0; i < num_total_tiles; i++) {
584 /* tile_permutation is independent of current_permutation, so
585 * abuse it to randomize the order of the src bos */
586 idx = tile_permutation[i];
587 src_buf_idx = idx / options.tiles_per_buf;
588 src_tile = idx % options.tiles_per_buf;
589 src_buf = &buffers[current_set][src_buf_idx];
591 tile2xy(src_buf, src_tile, &src_x, &src_y);
593 dst_buf_idx = permutation[idx] / options.tiles_per_buf;
594 dst_tile = permutation[idx] % options.tiles_per_buf;
595 dst_buf = &buffers[target_set][dst_buf_idx];
597 tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
599 if (options.trace_tile == i)
600 igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
603 cpucpy2d(src_buf->data,
604 src_buf->stride / sizeof(uint32_t),
607 dst_buf->stride / sizeof(uint32_t),
613 copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
618 intel_batchbuffer_flush(batch);
621 static void sanitize_tiles_per_buf(void)
623 if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
624 options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
627 static void parse_options(int argc, char **argv)
630 int option_index = 0;
631 static struct option long_options[] = {
632 {"no-hw", 0, 0, 'd'},
633 {"buf-size", 1, 0, 's'},
634 {"gpu-busy-load", 1, 0, 'g'},
635 {"no-signals", 0, 0, 'S'},
636 {"buffer-count", 1, 0, 'c'},
637 {"trace-tile", 1, 0, 't'},
638 {"disable-blt", 0, 0, 'b'},
639 {"disable-render", 0, 0, 'r'},
640 {"untiled", 0, 0, 'u'},
641 {"x-tiled", 0, 0, 'x'},
642 {"use-cpu-maps", 0, 0, 'm'},
643 {"rounds", 1, 0, 'o'},
644 {"no-fail", 0, 0, 'f'},
645 {"tiles-per-buf", 0, 0, 'p'},
646 #define DUCTAPE 0xdead0001
647 {"remove-duct-tape", 0, 0, DUCTAPE},
648 #define TILESZ 0xdead0002
649 {"tile-size", 1, 0, TILESZ},
650 #define CHCK_RENDER 0xdead0003
651 {"check-render-cpyfn", 0, 0, CHCK_RENDER},
655 options.scratch_buf_size = 256*4096;
657 options.use_signal_helper = 1;
658 options.gpu_busy_load = 0;
659 options.num_buffers = 0;
660 options.trace_tile = -1;
661 options.use_render = 1;
663 options.forced_tiling = -1;
664 options.use_cpu_maps = 0;
665 options.total_rounds = 512;
667 options.ducttape = 1;
668 options.tile_size = 16;
669 options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
670 options.check_render_cpyfn = 0;
672 while((c = getopt_long(argc, argv, "ds:g:c:t:rbuxmo:fp:",
673 long_options, &option_index)) != -1) {
677 igt_info("no-hw debug mode\n");
680 options.use_signal_helper = 0;
681 igt_info("disabling that pesky nuisance who keeps interrupting us\n");
685 if (tmp < options.tile_size*8192)
686 igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
687 else if (tmp & (tmp - 1)) {
688 igt_info("scratch buffer size needs to be a power-of-two\n");
690 igt_info("fixed scratch buffer size to %u\n", tmp);
691 options.scratch_buf_size = tmp;
692 sanitize_tiles_per_buf();
697 if (tmp < 0 || tmp > 10)
698 igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
700 igt_info("gpu busy load factor set to %i\n", tmp);
701 gpu_busy_load = options.gpu_busy_load = tmp;
705 options.num_buffers = atoi(optarg);
706 igt_info("buffer count set to %i\n", options.num_buffers);
709 options.trace_tile = atoi(optarg);
710 igt_info("tracing tile %i\n", options.trace_tile);
713 options.use_render = 0;
714 igt_info("disabling render copy\n");
718 igt_info("disabling blt copy\n");
721 options.forced_tiling = I915_TILING_NONE;
722 igt_info("disabling tiling\n");
725 if (options.use_cpu_maps) {
726 igt_info("tiling not possible with cpu maps\n");
728 options.forced_tiling = I915_TILING_X;
729 igt_info("using only X-tiling\n");
733 options.use_cpu_maps = 1;
734 options.forced_tiling = I915_TILING_NONE;
735 igt_info("disabling tiling\n");
738 options.total_rounds = atoi(optarg);
739 igt_info("total rounds %i\n", options.total_rounds);
743 igt_info("not failing when detecting errors\n");
746 options.tiles_per_buf = atoi(optarg);
747 igt_info("tiles per buffer %i\n", options.tiles_per_buf);
750 options.ducttape = 0;
751 igt_info("applying duct-tape\n");
754 options.tile_size = atoi(optarg);
755 sanitize_tiles_per_buf();
756 igt_info("til size %i\n", options.tile_size);
759 options.check_render_cpyfn = 1;
760 igt_info("checking render copy function\n");
763 igt_info("unkown command options\n");
769 igt_info("unkown command options\n");
771 /* actually 32767, according to docs, but that kills our nice pot calculations. */
772 options.max_dimension = 16*1024;
773 if (options.use_render) {
774 if (IS_GEN2(devid) || IS_GEN3(devid))
775 options.max_dimension = 2048;
777 options.max_dimension = 8192;
779 igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
782 static void init(void)
787 if (options.num_buffers == 0) {
788 tmp = gem_aperture_size(drm_fd);
789 tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp;
790 num_buffers = 2 * tmp / options.scratch_buf_size / 3;
792 igt_info("using %u buffers\n", num_buffers);
794 num_buffers = options.num_buffers;
796 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
797 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
798 drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
799 num_fences = gem_available_fences(drm_fd);
800 igt_assert(num_fences > 4);
801 batch = intel_batchbuffer_alloc(bufmgr, devid);
803 busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
804 if (options.forced_tiling >= 0)
805 gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
807 for (i = 0; i < num_buffers; i++) {
808 init_buffer(&buffers[0][i], options.scratch_buf_size);
809 init_buffer(&buffers[1][i], options.scratch_buf_size);
811 num_total_tiles += buffers[0][i].num_tiles;
815 /* just in case it helps reproducability */
819 static void check_render_copyfunc(void)
821 struct igt_buf src, dst;
825 if (!options.check_render_cpyfn)
828 init_buffer(&src, options.scratch_buf_size);
829 init_buffer(&dst, options.scratch_buf_size);
831 for (pass = 0; pass < 16; pass++) {
832 int sx = random() % (igt_buf_width(&src)-options.tile_size);
833 int sy = random() % (igt_buf_height(&src)-options.tile_size);
834 int dx = random() % (igt_buf_width(&dst)-options.tile_size);
835 int dy = random() % (igt_buf_height(&dst)-options.tile_size);
837 if (options.use_cpu_maps)
838 set_to_cpu_domain(&src, 1);
840 memset(src.data, 0xff, options.scratch_buf_size);
841 for (j = 0; j < options.tile_size; j++) {
842 ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
843 for (i = 0; i < options.tile_size; i++)
844 ptr[i] = j * options.tile_size + i;
847 render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
849 if (options.use_cpu_maps)
850 set_to_cpu_domain(&dst, 0);
852 for (j = 0; j < options.tile_size; j++) {
853 ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
854 for (i = 0; i < options.tile_size; i++)
855 if (ptr[i] != j * options.tile_size + i) {
856 igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
863 int main(int argc, char **argv)
866 unsigned *current_permutation, *tmp_permutation;
870 drm_fd = drm_open_any();
871 devid = intel_get_drm_devid(drm_fd);
873 parse_options(argc, argv);
875 /* start our little helper early before too may allocations occur */
876 if (options.use_signal_helper)
877 igt_fork_signal_helper();
881 check_render_copyfunc();
883 tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
884 current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
885 tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
886 igt_assert(tile_permutation);
887 igt_assert(current_permutation);
888 igt_assert(tmp_permutation);
892 for (i = 0; i < options.total_rounds; i++) {
893 igt_info("round %i\n", i);
896 igt_info("everything correct after %i rounds\n", i + 1);
899 target_set = (current_set + 1) & 1;
900 init_set(target_set);
902 for (j = 0; j < num_total_tiles; j++)
903 current_permutation[j] = j;
904 igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
906 copy_tiles(current_permutation);
908 memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
910 /* accumulate the permutations */
911 for (j = 0; j < num_total_tiles; j++)
912 tile_permutation[j] = current_permutation[tmp_permutation[j]];
914 current_set = target_set;
919 igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
921 intel_batchbuffer_free(batch);
922 drm_intel_bufmgr_destroy(bufmgr);
926 igt_stop_signal_helper();