lib/igt_kms: Unify pipe name helpers
[platform/upstream/intel-gpu-tools.git] / tests / gem_stress.c
1 /*
2  * Copyright © 2011 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel.vetter@ffwll.ch>
25  *
26  * Partially based upon gem_tiled_fence_blits.c
27  */
28
29 /** @file gem_stress.c
30  *
31  * This is a general gem coherency test. It's designed to eventually replicate
32  * any possible sequence of access patterns. It works by copying a set of tiles
33  * between two sets of backing buffer objects, randomly permutating the assinged
34  * position on each copy operations.
35  *
36  * The copy operation are done in tiny portions (to reduce any race windows
37  * for corruptions, hence increasing the chances for observing one) and are
38  * constantly switched between all means to copy stuff (fenced blitter, unfenced
39  * render, mmap, pwrite/read).
40  *
41  * After every complete move of a set tiling parameters of a buffer are randomly
42  * changed to simulate the effects of libdrm caching.
43  *
44  * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
45  * split up to test relaxed fencing. Using this to push the average working set
46  * size over the available gtt space forces objects to be mapped as unfenceable
47  * (and as a side-effect tests gtt map/unmap coherency).
48  *
49  * In short: designed for maximum evilness.
50  */
51
52 #include <stdlib.h>
53 #include <sys/ioctl.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <fcntl.h>
57 #include <inttypes.h>
58 #include <errno.h>
59 #include <sys/stat.h>
60 #include <sys/time.h>
61
62 #include <drm.h>
63
64 #include "ioctl_wrappers.h"
65 #include "drmtest.h"
66 #include "intel_bufmgr.h"
67 #include "intel_batchbuffer.h"
68 #include "intel_io.h"
69 #include "intel_chipset.h"
70 #include "igt_aux.h"
71
72 #define CMD_POLY_STIPPLE_OFFSET       0x7906
73
74 #define DUCTAPE 0xdead0001
75 #define TILESZ  0xdead0002
76 #define CHCK_RENDER 0xdead0003
77
78 /** TODO:
79  * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
80  * - render copy (to check fence tracking and cache coherency management by the
81  *   kernel)
82  * - multi-threading: probably just a wrapper script to launch multiple
83  *   instances + an option to accordingly reduce the working set
84  * - gen6 inter-ring coherency (needs render copy, first)
85  * - variable buffer size
86  * - add an option to fork a second process that randomly sends signals to the
87  *   first one (to check consistency of the kernel recovery paths)
88  */
89
90 drm_intel_bufmgr *bufmgr;
91 struct intel_batchbuffer *batch;
92 int drm_fd;
93 int devid;
94 int num_fences;
95
96 drm_intel_bo *busy_bo;
97
98 struct option_struct {
99     unsigned scratch_buf_size;
100     unsigned max_dimension;
101     unsigned num_buffers;
102     int trace_tile;
103     int no_hw;
104     int gpu_busy_load;
105     int use_render;
106     int use_blt;
107     int forced_tiling;
108     int use_cpu_maps;
109     int total_rounds;
110     int fail;
111     int tiles_per_buf;
112     int ducttape;
113     int tile_size;
114     int check_render_cpyfn;
115     int use_signal_helper;
116 };
117
118 struct option_struct options;
119
120 #define MAX_BUFS                4096
121 #define SCRATCH_BUF_SIZE        1024*1024
122 #define BUSY_BUF_SIZE           (256*4096)
123 #define TILE_BYTES(size)        ((size)*(size)*sizeof(uint32_t))
124
125 static struct igt_buf buffers[2][MAX_BUFS];
126 /* tile i is at logical position tile_permutation[i] */
127 static unsigned *tile_permutation;
128 static unsigned num_buffers = 0;
129 static unsigned current_set = 0;
130 static unsigned target_set = 0;
131 static unsigned num_total_tiles = 0;
132
133 int fence_storm = 0;
134 static int gpu_busy_load = 10;
135
136 struct {
137         unsigned num_failed;
138         unsigned max_failed_reads;
139 } stats;
140
141 static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
142 {
143         igt_assert(tile < buf->num_tiles);
144         *x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
145         *y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
146 }
147
148 static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
149                      unsigned src_x, unsigned src_y, unsigned w, unsigned h,
150                      drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
151                      unsigned dst_x, unsigned dst_y)
152 {
153         uint32_t cmd_bits = 0;
154
155         if (IS_965(devid) && src_tiling) {
156                 src_pitch /= 4;
157                 cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
158         }
159
160         if (IS_965(devid) && dst_tiling) {
161                 dst_pitch /= 4;
162                 cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
163         }
164
165         /* copy lower half to upper half */
166         BLIT_COPY_BATCH_START(devid, cmd_bits);
167         OUT_BATCH((3 << 24) | /* 32 bits */
168                   (0xcc << 16) | /* copy ROP */
169                   dst_pitch);
170         OUT_BATCH(dst_y << 16 | dst_x);
171         OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
172         OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
173         BLIT_RELOC_UDW(devid);
174         OUT_BATCH(src_y << 16 | src_x);
175         OUT_BATCH(src_pitch);
176         OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
177         BLIT_RELOC_UDW(devid);
178         ADVANCE_BATCH();
179
180         if (IS_GEN6(devid) || IS_GEN7(devid)) {
181                 BEGIN_BATCH(3);
182                 OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
183                 OUT_BATCH(0);
184                 OUT_BATCH(0);
185                 ADVANCE_BATCH();
186         }
187 }
188
189 /* All this gem trashing wastes too much cpu time, so give the gpu something to
190  * do to increase changes for races. */
191 static void keep_gpu_busy(void)
192 {
193         int tmp;
194
195         tmp = 1 << gpu_busy_load;
196         igt_assert(tmp <= 1024);
197
198         emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
199                  busy_bo, 0, 4096, 0, 128);
200 }
201
202 static void set_to_cpu_domain(struct igt_buf *buf, int writing)
203 {
204         gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
205                        writing ? I915_GEM_DOMAIN_CPU : 0);
206 }
207
208 static unsigned int copyfunc_seq = 0;
209 static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
210                         struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
211                         unsigned logical_tile_no);
212
213 /* stride, x, y in units of uint32_t! */
214 static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
215                      uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
216                      unsigned logical_tile_no)
217 {
218         int i, j;
219         int failed = 0;
220
221         for (i = 0; i < options.tile_size; i++) {
222                 for (j = 0; j < options.tile_size; j++) {
223                         unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
224                         unsigned src_ofs = src_x + j + src_stride * (src_y + i);
225                         unsigned expect = logical_tile_no*options.tile_size*options.tile_size
226                             + i*options.tile_size + j;
227                         uint32_t tmp = src[src_ofs];
228                         if (tmp != expect) {
229                             igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
230                             igt_fail_on(options.trace_tile >= 0 && options.fail);
231                             failed++;
232                         }
233                         /* when not aborting, correct any errors */
234                         dst[dst_ofs] = expect;
235                 }
236         }
237         igt_fail_on(failed && options.fail);
238
239         if (failed > stats.max_failed_reads)
240                 stats.max_failed_reads = failed;
241         if (failed)
242                 stats.num_failed++;
243 }
244
245 static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
246                          struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
247                          unsigned logical_tile_no)
248 {
249         igt_assert(batch->ptr == batch->buffer);
250
251         if (options.ducttape)
252                 drm_intel_bo_wait_rendering(dst->bo);
253
254         if (options.use_cpu_maps) {
255                 set_to_cpu_domain(src, 0);
256                 set_to_cpu_domain(dst, 1);
257         }
258
259         cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
260                  dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
261                  logical_tile_no);
262 }
263
264 static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
265                          struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
266                          unsigned logical_tile_no)
267 {
268         uint32_t tmp_tile[options.tile_size*options.tile_size];
269         int i;
270
271         igt_assert(batch->ptr == batch->buffer);
272
273         if (options.ducttape)
274                 drm_intel_bo_wait_rendering(dst->bo);
275
276         if (src->tiling == I915_TILING_NONE) {
277                 for (i = 0; i < options.tile_size; i++) {
278                         unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
279                         drm_intel_bo_get_subdata(src->bo, ofs,
280                                                  options.tile_size*sizeof(uint32_t),
281                                                  tmp_tile + options.tile_size*i);
282                 }
283         } else {
284                 if (options.use_cpu_maps)
285                         set_to_cpu_domain(src, 0);
286
287                 cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
288                          tmp_tile, options.tile_size, 0, 0, logical_tile_no);
289         }
290
291         if (dst->tiling == I915_TILING_NONE) {
292                 for (i = 0; i < options.tile_size; i++) {
293                         unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
294                         drm_intel_bo_subdata(dst->bo, ofs,
295                                              options.tile_size*sizeof(uint32_t),
296                                              tmp_tile + options.tile_size*i);
297                 }
298         } else {
299                 if (options.use_cpu_maps)
300                         set_to_cpu_domain(dst, 1);
301
302                 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
303                          dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
304                          logical_tile_no);
305         }
306 }
307
308 static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
309                              struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
310                              unsigned logical_tile_no)
311 {
312         static unsigned keep_gpu_busy_counter = 0;
313
314         /* check both edges of the fence usage */
315         if (keep_gpu_busy_counter & 1 && !fence_storm)
316                 keep_gpu_busy();
317
318         emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
319                  options.tile_size, options.tile_size,
320                  dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
321
322         if (!(keep_gpu_busy_counter & 1) && !fence_storm)
323                 keep_gpu_busy();
324
325         keep_gpu_busy_counter++;
326
327         if (src->tiling)
328                 fence_storm--;
329         if (dst->tiling)
330                 fence_storm--;
331
332         if (fence_storm <= 1) {
333                 fence_storm = 0;
334                 intel_batchbuffer_flush(batch);
335         }
336 }
337
338 static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
339                             struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
340                             unsigned logical_tile_no)
341 {
342         static unsigned keep_gpu_busy_counter = 0;
343         igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
344
345         /* check both edges of the fence usage */
346         if (keep_gpu_busy_counter & 1)
347                 keep_gpu_busy();
348
349         if (rendercopy) {
350                 /*
351                  * Flush outstanding blts so that they don't end up on
352                  * the render ring when that's not allowed (gen6+).
353                  */
354                 intel_batchbuffer_flush(batch);
355                 rendercopy(batch, NULL, src, src_x, src_y,
356                      options.tile_size, options.tile_size,
357                      dst, dst_x, dst_y);
358         } else
359                 blitter_copyfunc(src, src_x, src_y,
360                                  dst, dst_x, dst_y,
361                                  logical_tile_no);
362         if (!(keep_gpu_busy_counter & 1))
363                 keep_gpu_busy();
364
365         keep_gpu_busy_counter++;
366         intel_batchbuffer_flush(batch);
367 }
368
369 static void next_copyfunc(int tile)
370 {
371         if (fence_storm) {
372                 if (tile == options.trace_tile)
373                         igt_info(" using fence storm\n");
374                 return;
375         }
376
377         if (copyfunc_seq % 61 == 0
378                         && options.forced_tiling != I915_TILING_NONE) {
379                 if (tile == options.trace_tile)
380                         igt_info(" using fence storm\n");
381                 fence_storm = num_fences;
382                 copyfunc = blitter_copyfunc;
383         } else if (copyfunc_seq % 17 == 0) {
384                 if (tile == options.trace_tile)
385                         igt_info(" using cpu\n");
386                 copyfunc = cpu_copyfunc;
387         } else if (copyfunc_seq % 19 == 0) {
388                 if (tile == options.trace_tile)
389                         igt_info(" using prw\n");
390                 copyfunc = prw_copyfunc;
391         } else if (copyfunc_seq % 3 == 0 && options.use_render) {
392                 if (tile == options.trace_tile)
393                         igt_info(" using render\n");
394                 copyfunc = render_copyfunc;
395         } else if (options.use_blt){
396                 if (tile == options.trace_tile)
397                         igt_info(" using blitter\n");
398                 copyfunc = blitter_copyfunc;
399         } else if (options.use_render){
400                 if (tile == options.trace_tile)
401                         igt_info(" using render\n");
402                 copyfunc = render_copyfunc;
403         } else {
404                 copyfunc = cpu_copyfunc;
405         }
406
407         copyfunc_seq++;
408 }
409
410 static void fan_out(void)
411 {
412         uint32_t tmp_tile[options.tile_size*options.tile_size];
413         uint32_t seq = 0;
414         int i, k;
415         unsigned tile, buf_idx, x, y;
416
417         for (i = 0; i < num_total_tiles; i++) {
418                 tile = i;
419                 buf_idx = tile / options.tiles_per_buf;
420                 tile %= options.tiles_per_buf;
421
422                 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
423
424                 for (k = 0; k < options.tile_size*options.tile_size; k++)
425                         tmp_tile[k] = seq++;
426
427                 if (options.use_cpu_maps)
428                         set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
429
430                 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
431                          buffers[current_set][buf_idx].data,
432                          buffers[current_set][buf_idx].stride / sizeof(uint32_t),
433                          x, y, i);
434         }
435
436         for (i = 0; i < num_total_tiles; i++)
437                 tile_permutation[i] = i;
438 }
439
440 static void fan_in_and_check(void)
441 {
442         uint32_t tmp_tile[options.tile_size*options.tile_size];
443         unsigned tile, buf_idx, x, y;
444         int i;
445         for (i = 0; i < num_total_tiles; i++) {
446                 tile = tile_permutation[i];
447                 buf_idx = tile / options.tiles_per_buf;
448                 tile %= options.tiles_per_buf;
449
450                 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
451
452                 if (options.use_cpu_maps)
453                         set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
454
455                 cpucpy2d(buffers[current_set][buf_idx].data,
456                          buffers[current_set][buf_idx].stride / sizeof(uint32_t),
457                          x, y,
458                          tmp_tile, options.tile_size, 0, 0,
459                          i);
460         }
461 }
462
463 static void sanitize_stride(struct igt_buf *buf)
464 {
465
466         if (igt_buf_height(buf) > options.max_dimension)
467                 buf->stride = buf->size / options.max_dimension;
468
469         if (igt_buf_height(buf) < options.tile_size)
470                 buf->stride = buf->size / options.tile_size;
471
472         if (igt_buf_width(buf) < options.tile_size)
473                 buf->stride = options.tile_size * sizeof(uint32_t);
474
475         igt_assert(buf->stride <= 8192);
476         igt_assert(igt_buf_width(buf) <= options.max_dimension);
477         igt_assert(igt_buf_height(buf) <= options.max_dimension);
478
479         igt_assert(igt_buf_width(buf) >= options.tile_size);
480         igt_assert(igt_buf_height(buf) >= options.tile_size);
481
482 }
483
484 static void init_buffer(struct igt_buf *buf, unsigned size)
485 {
486         buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
487         buf->size = size;
488         igt_assert(buf->bo);
489         buf->tiling = I915_TILING_NONE;
490         buf->stride = 4096;
491
492         sanitize_stride(buf);
493
494         if (options.no_hw)
495                 buf->data = malloc(size);
496         else {
497                 if (options.use_cpu_maps)
498                         drm_intel_bo_map(buf->bo, 1);
499                 else
500                         drm_intel_gem_bo_map_gtt(buf->bo);
501                 buf->data = buf->bo->virtual;
502         }
503
504         buf->num_tiles = options.tiles_per_buf;
505 }
506
507 static void exchange_buf(void *array, unsigned i, unsigned j)
508 {
509         struct igt_buf *buf_arr, tmp;
510         buf_arr = array;
511
512         memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
513         memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
514         memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
515 }
516
517
518 static void init_set(unsigned set)
519 {
520         long int r;
521         int i;
522
523         igt_permute_array(buffers[set], num_buffers, exchange_buf);
524
525         if (current_set == 1 && options.gpu_busy_load == 0) {
526                 gpu_busy_load++;
527                 if (gpu_busy_load > 10)
528                         gpu_busy_load = 6;
529         }
530
531         for (i = 0; i < num_buffers; i++) {
532                 r = random();
533                 if ((r & 3) != 0)
534                     continue;
535                 r >>= 2;
536
537                 if ((r & 3) != 0)
538                         buffers[set][i].tiling = I915_TILING_X;
539                 else
540                         buffers[set][i].tiling = I915_TILING_NONE;
541                 r >>= 2;
542                 if (options.forced_tiling >= 0)
543                         buffers[set][i].tiling = options.forced_tiling;
544
545                 if (buffers[set][i].tiling == I915_TILING_NONE) {
546                         /* min 64 byte stride */
547                         r %= 8;
548                         buffers[set][i].stride = 64 * (1 << r);
549                 } else if (IS_GEN2(devid)) {
550                         /* min 128 byte stride */
551                         r %= 7;
552                         buffers[set][i].stride = 128 * (1 << r);
553                 } else {
554                         /* min 512 byte stride */
555                         r %= 5;
556                         buffers[set][i].stride = 512 * (1 << r);
557                 }
558
559                 sanitize_stride(&buffers[set][i]);
560
561                 gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
562                                buffers[set][i].tiling,
563                                buffers[set][i].stride);
564
565                 if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
566                         igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
567         }
568 }
569
570 static void exchange_uint(void *array, unsigned i, unsigned j)
571 {
572         unsigned *i_arr = array;
573         unsigned i_tmp;
574
575         i_tmp = i_arr[i];
576         i_arr[i] = i_arr[j];
577         i_arr[j] = i_tmp;
578 }
579
580 static void copy_tiles(unsigned *permutation)
581 {
582         unsigned src_tile, src_buf_idx, src_x, src_y;
583         unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
584         struct igt_buf *src_buf, *dst_buf;
585         int i, idx;
586         for (i = 0; i < num_total_tiles; i++) {
587                 /* tile_permutation is independent of current_permutation, so
588                  * abuse it to randomize the order of the src bos */
589                 idx  = tile_permutation[i];
590                 src_buf_idx = idx / options.tiles_per_buf;
591                 src_tile = idx % options.tiles_per_buf;
592                 src_buf = &buffers[current_set][src_buf_idx];
593
594                 tile2xy(src_buf, src_tile, &src_x, &src_y);
595
596                 dst_buf_idx = permutation[idx] / options.tiles_per_buf;
597                 dst_tile = permutation[idx] % options.tiles_per_buf;
598                 dst_buf = &buffers[target_set][dst_buf_idx];
599
600                 tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
601
602                 if (options.trace_tile == i)
603                         igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
604
605                 if (options.no_hw) {
606                         cpucpy2d(src_buf->data,
607                                  src_buf->stride / sizeof(uint32_t),
608                                  src_x, src_y,
609                                  dst_buf->data,
610                                  dst_buf->stride / sizeof(uint32_t),
611                                  dst_x, dst_y,
612                                  i);
613                 } else {
614                         next_copyfunc(i);
615
616                         copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
617                                  i);
618                 }
619         }
620
621         intel_batchbuffer_flush(batch);
622 }
623
624 static void sanitize_tiles_per_buf(void)
625 {
626         if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
627                 options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
628 }
629
630 static int parse_options(int opt, int opt_index)
631 {
632         int tmp;
633
634         switch(opt) {
635                 case 'd':
636                         options.no_hw = 1;
637                         igt_info("no-hw debug mode\n");
638                         break;
639                 case 'S':
640                         options.use_signal_helper = 0;
641                         igt_info("disabling that pesky nuisance who keeps interrupting us\n");
642                         break;
643                 case 's':
644                         tmp = atoi(optarg);
645                         if (tmp < options.tile_size*8192)
646                                 igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
647                         else if (tmp & (tmp - 1)) {
648                                 igt_info("scratch buffer size needs to be a power-of-two\n");
649                         } else {
650                                 igt_info("fixed scratch buffer size to %u\n", tmp);
651                                 options.scratch_buf_size = tmp;
652                                 sanitize_tiles_per_buf();
653                         }
654                         break;
655                 case 'g':
656                         tmp = atoi(optarg);
657                         if (tmp < 0 || tmp > 10)
658                                 igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
659                         else {
660                                 igt_info("gpu busy load factor set to %i\n", tmp);
661                                 gpu_busy_load = options.gpu_busy_load = tmp;
662                         }
663                         break;
664                 case 'c':
665                         options.num_buffers = atoi(optarg);
666                         igt_info("buffer count set to %i\n", options.num_buffers);
667                         break;
668                 case 't':
669                         options.trace_tile = atoi(optarg);
670                         igt_info("tracing tile %i\n", options.trace_tile);
671                         break;
672                 case 'r':
673                         options.use_render = 0;
674                         igt_info("disabling render copy\n");
675                         break;
676                 case 'b':
677                         options.use_blt = 0;
678                         igt_info("disabling blt copy\n");
679                         break;
680                 case 'u':
681                         options.forced_tiling = I915_TILING_NONE;
682                         igt_info("disabling tiling\n");
683                         break;
684                 case 'x':
685                         if (options.use_cpu_maps) {
686                                 igt_info("tiling not possible with cpu maps\n");
687                         } else {
688                                 options.forced_tiling = I915_TILING_X;
689                                 igt_info("using only X-tiling\n");
690                         }
691                         break;
692                 case 'm':
693                         options.use_cpu_maps = 1;
694                         options.forced_tiling = I915_TILING_NONE;
695                         igt_info("disabling tiling\n");
696                         break;
697                 case 'o':
698                         options.total_rounds = atoi(optarg);
699                         igt_info("total rounds %i\n", options.total_rounds);
700                         break;
701                 case 'f':
702                         options.fail = 0;
703                         igt_info("not failing when detecting errors\n");
704                         break;
705                 case 'p':
706                         options.tiles_per_buf = atoi(optarg);
707                         igt_info("tiles per buffer %i\n", options.tiles_per_buf);
708                         break;
709                 case DUCTAPE:
710                         options.ducttape = 0;
711                         igt_info("applying duct-tape\n");
712                         break;
713                 case TILESZ:
714                         options.tile_size = atoi(optarg);
715                         sanitize_tiles_per_buf();
716                         igt_info("til size %i\n", options.tile_size);
717                         break;
718                 case CHCK_RENDER:
719                         options.check_render_cpyfn = 1;
720                         igt_info("checking render copy function\n");
721                         break;
722         }
723
724         /* actually 32767, according to docs, but that kills our nice pot calculations. */
725         options.max_dimension = 16*1024;
726         if (options.use_render) {
727                 if (IS_GEN2(devid) || IS_GEN3(devid))
728                         options.max_dimension = 2048;
729                 else
730                         options.max_dimension = 8192;
731         }
732         igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
733
734         return 0;
735 }
736
737 static void init(void)
738 {
739         int i;
740         unsigned tmp;
741
742         if (options.num_buffers == 0) {
743                 tmp = gem_aperture_size(drm_fd);
744                 tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp;
745                 num_buffers = 2 * tmp / options.scratch_buf_size / 3;
746                 num_buffers /= 2;
747                 igt_info("using %u buffers\n", num_buffers);
748         } else
749                 num_buffers = options.num_buffers;
750
751         bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
752         drm_intel_bufmgr_gem_enable_reuse(bufmgr);
753         drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
754         num_fences = gem_available_fences(drm_fd);
755         igt_assert(num_fences > 4);
756         batch = intel_batchbuffer_alloc(bufmgr, devid);
757
758         busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
759         if (options.forced_tiling >= 0)
760                 gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
761
762         for (i = 0; i < num_buffers; i++) {
763                 init_buffer(&buffers[0][i], options.scratch_buf_size);
764                 init_buffer(&buffers[1][i], options.scratch_buf_size);
765
766                 num_total_tiles += buffers[0][i].num_tiles;
767         }
768         current_set = 0;
769
770         /* just in case it helps reproducability */
771         srandom(0xdeadbeef);
772 }
773
774 static void check_render_copyfunc(void)
775 {
776         struct igt_buf src, dst;
777         uint32_t *ptr;
778         int i, j, pass;
779
780         if (!options.check_render_cpyfn)
781                 return;
782
783         init_buffer(&src, options.scratch_buf_size);
784         init_buffer(&dst, options.scratch_buf_size);
785
786         for (pass = 0; pass < 16; pass++) {
787                 int sx = random() % (igt_buf_width(&src)-options.tile_size);
788                 int sy = random() % (igt_buf_height(&src)-options.tile_size);
789                 int dx = random() % (igt_buf_width(&dst)-options.tile_size);
790                 int dy = random() % (igt_buf_height(&dst)-options.tile_size);
791
792                 if (options.use_cpu_maps)
793                         set_to_cpu_domain(&src, 1);
794
795                 memset(src.data, 0xff, options.scratch_buf_size);
796                 for (j = 0; j < options.tile_size; j++) {
797                         ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
798                         for (i = 0; i < options.tile_size; i++)
799                                 ptr[i] = j * options.tile_size + i;
800                 }
801
802                 render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
803
804                 if (options.use_cpu_maps)
805                         set_to_cpu_domain(&dst, 0);
806
807                 for (j = 0; j < options.tile_size; j++) {
808                         ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
809                         for (i = 0; i < options.tile_size; i++)
810                                 if (ptr[i] != j * options.tile_size + i) {
811                                         igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
812                                 }
813                 }
814         }
815 }
816
817
818 int main(int argc, char **argv)
819 {
820         int i, j;
821         unsigned *current_permutation, *tmp_permutation;
822         static struct option long_options[] = {
823                 {"no-hw", 0, 0, 'd'},
824                 {"buf-size", 1, 0, 's'},
825                 {"gpu-busy-load", 1, 0, 'g'},
826                 {"no-signals", 0, 0, 'S'},
827                 {"buffer-count", 1, 0, 'c'},
828                 {"trace-tile", 1, 0, 't'},
829                 {"disable-blt", 0, 0, 'b'},
830                 {"disable-render", 0, 0, 'r'},
831                 {"untiled", 0, 0, 'u'},
832                 {"x-tiled", 0, 0, 'x'},
833                 {"use-cpu-maps", 0, 0, 'm'},
834                 {"rounds", 1, 0, 'o'},
835                 {"no-fail", 0, 0, 'f'},
836                 {"tiles-per-buf", 0, 0, 'p'},
837                 {"remove-duct-tape", 0, 0, DUCTAPE},
838                 {"tile-size", 1, 0, TILESZ},
839                 {"check-render-cpyfn", 0, 0, CHCK_RENDER},
840                 {NULL, 0, 0, 0},
841         };
842
843         options.scratch_buf_size = 256*4096;
844         options.no_hw = 0;
845         options.use_signal_helper = 1;
846         options.gpu_busy_load = 0;
847         options.num_buffers = 0;
848         options.trace_tile = -1;
849         options.use_render = 1;
850         options.use_blt = 1;
851         options.forced_tiling = -1;
852         options.use_cpu_maps = 0;
853         options.total_rounds = 512;
854         options.fail = 1;
855         options.ducttape = 1;
856         options.tile_size = 16;
857         options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
858         options.check_render_cpyfn = 0;
859
860         igt_simple_init_parse_opts(argc, argv,"ds:g:c:t:rbuxmo:fp:",
861                                    long_options, NULL, parse_options);
862
863         drm_fd = drm_open_any();
864         devid = intel_get_drm_devid(drm_fd);
865
866         /* start our little helper early before too may allocations occur */
867         if (options.use_signal_helper)
868                 igt_fork_signal_helper();
869
870         init();
871
872         check_render_copyfunc();
873
874         tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
875         current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
876         tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
877         igt_assert(tile_permutation);
878         igt_assert(current_permutation);
879         igt_assert(tmp_permutation);
880
881         fan_out();
882
883         for (i = 0; i < options.total_rounds; i++) {
884                 igt_info("round %i\n", i);
885                 if (i % 64 == 63) {
886                         fan_in_and_check();
887                         igt_info("everything correct after %i rounds\n", i + 1);
888                 }
889
890                 target_set = (current_set + 1) & 1;
891                 init_set(target_set);
892
893                 for (j = 0; j < num_total_tiles; j++)
894                         current_permutation[j] = j;
895                 igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
896
897                 copy_tiles(current_permutation);
898
899                 memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
900
901                 /* accumulate the permutations */
902                 for (j = 0; j < num_total_tiles; j++)
903                         tile_permutation[j] = current_permutation[tmp_permutation[j]];
904
905                 current_set = target_set;
906         }
907
908         fan_in_and_check();
909
910         igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
911
912         intel_batchbuffer_free(batch);
913         drm_intel_bufmgr_destroy(bufmgr);
914
915         close(drm_fd);
916
917         igt_stop_signal_helper();
918
919         return 0;
920 }