tests/gem_stress.c

   1 /*
   2  * Copyright © 2011 Daniel Vetter
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Daniel Vetter <daniel.vetter@ffwll.ch>
  25  *
  26  * Partially based upon gem_tiled_fence_blits.c
  27  */
  28
  29 /** @file gem_stress.c
  30  *
  31  * This is a general gem coherency test. It's designed to eventually replicate
  32  * any possible sequence of access patterns. It works by copying a set of tiles
  33  * between two sets of backing buffer objects, randomly permutating the assinged
  34  * position on each copy operations.
  35  *
  36  * The copy operation are done in tiny portions (to reduce any race windows
  37  * for corruptions, hence increasing the chances for observing one) and are
  38  * constantly switched between all means to copy stuff (fenced blitter, unfenced
  39  * render, mmap, pwrite/read).
  40  *
  41  * After every complete move of a set tiling parameters of a buffer are randomly
  42  * changed to simulate the effects of libdrm caching.
  43  *
  44  * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
  45  * split up to test relaxed fencing. Using this to push the average working set
  46  * size over the available gtt space forces objects to be mapped as unfenceable
  47  * (and as a side-effect tests gtt map/unmap coherency).
  48  *
  49  * In short: designed for maximum evilness.
  50  */
  51
  52 #include <stdlib.h>
  53 #include <sys/ioctl.h>
  54 #include <stdio.h>
  55 #include <string.h>
  56 #include <fcntl.h>
  57 #include <inttypes.h>
  58 #include <errno.h>
  59 #include <sys/stat.h>
  60 #include <sys/time.h>
  61 #include <getopt.h>
  62
  63 #include <drm.h>
  64
  65 #include "ioctl_wrappers.h"
  66 #include "drmtest.h"
  67 #include "intel_bufmgr.h"
  68 #include "intel_batchbuffer.h"
  69 #include "intel_io.h"
  70 #include "intel_chipset.h"
  71 #include "igt_aux.h"
  72
  73 #define CMD_POLY_STIPPLE_OFFSET       0x7906
  74
  75 /** TODO:
  76  * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
  77  * - render copy (to check fence tracking and cache coherency management by the
  78  *   kernel)
  79  * - multi-threading: probably just a wrapper script to launch multiple
  80  *   instances + an option to accordingly reduce the working set
  81  * - gen6 inter-ring coherency (needs render copy, first)
  82  * - variable buffer size
  83  * - add an option to fork a second process that randomly sends signals to the
  84  *   first one (to check consistency of the kernel recovery paths)
  85  */
  86
  87 drm_intel_bufmgr *bufmgr;
  88 struct intel_batchbuffer *batch;
  89 int drm_fd;
  90 int devid;
  91 int num_fences;
  92
  93 drm_intel_bo *busy_bo;
  94
  95 struct option_struct {
  96     unsigned scratch_buf_size;
  97     unsigned max_dimension;
  98     unsigned num_buffers;
  99     int trace_tile;
 100     int no_hw;
 101     int gpu_busy_load;
 102     int use_render;
 103     int use_blt;
 104     int forced_tiling;
 105     int use_cpu_maps;
 106     int total_rounds;
 107     int fail;
 108     int tiles_per_buf;
 109     int ducttape;
 110     int tile_size;
 111     int check_render_cpyfn;
 112     int use_signal_helper;
 113 };
 114
 115 struct option_struct options;
 116
 117 #define MAX_BUFS                4096
 118 #define SCRATCH_BUF_SIZE        1024*1024
 119 #define BUSY_BUF_SIZE           (256*4096)
 120 #define TILE_BYTES(size)        ((size)*(size)*sizeof(uint32_t))
 121
 122 static struct igt_buf buffers[2][MAX_BUFS];
 123 /* tile i is at logical position tile_permutation[i] */
 124 static unsigned *tile_permutation;
 125 static unsigned num_buffers = 0;
 126 static unsigned current_set = 0;
 127 static unsigned target_set = 0;
 128 static unsigned num_total_tiles = 0;
 129
 130 int fence_storm = 0;
 131 static int gpu_busy_load = 10;
 132
 133 struct {
 134         unsigned num_failed;
 135         unsigned max_failed_reads;
 136 } stats;
 137
 138 static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
 139 {
 140         igt_assert(tile < buf->num_tiles);
 141         *x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
 142         *y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
 143 }
 144
 145 static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
 146                      unsigned src_x, unsigned src_y, unsigned w, unsigned h,
 147                      drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
 148                      unsigned dst_x, unsigned dst_y)
 149 {
 150         uint32_t cmd_bits = 0;
 151
 152         if (IS_965(devid) && src_tiling) {
 153                 src_pitch /= 4;
 154                 cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
 155         }
 156
 157         if (IS_965(devid) && dst_tiling) {
 158                 dst_pitch /= 4;
 159                 cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
 160         }
 161
 162         /* copy lower half to upper half */
 163         BLIT_COPY_BATCH_START(devid, cmd_bits);
 164         OUT_BATCH((3 << 24) | /* 32 bits */
 165                   (0xcc << 16) | /* copy ROP */
 166                   dst_pitch);
 167         OUT_BATCH(dst_y << 16 | dst_x);
 168         OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
 169         OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
 170         BLIT_RELOC_UDW(devid);
 171         OUT_BATCH(src_y << 16 | src_x);
 172         OUT_BATCH(src_pitch);
 173         OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 174         BLIT_RELOC_UDW(devid);
 175         ADVANCE_BATCH();
 176
 177         if (IS_GEN6(devid) || IS_GEN7(devid)) {
 178                 BEGIN_BATCH(3);
 179                 OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 180                 OUT_BATCH(0);
 181                 OUT_BATCH(0);
 182                 ADVANCE_BATCH();
 183         }
 184 }
 185
 186 /* All this gem trashing wastes too much cpu time, so give the gpu something to
 187  * do to increase changes for races. */
 188 static void keep_gpu_busy(void)
 189 {
 190         int tmp;
 191
 192         tmp = 1 << gpu_busy_load;
 193         igt_assert(tmp <= 1024);
 194
 195         emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
 196                  busy_bo, 0, 4096, 0, 128);
 197 }
 198
 199 static void set_to_cpu_domain(struct igt_buf *buf, int writing)
 200 {
 201         gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
 202                        writing ? I915_GEM_DOMAIN_CPU : 0);
 203 }
 204
 205 static unsigned int copyfunc_seq = 0;
 206 static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
 207                         struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
 208                         unsigned logical_tile_no);
 209
 210 /* stride, x, y in units of uint32_t! */
 211 static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
 212                      uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
 213                      unsigned logical_tile_no)
 214 {
 215         int i, j;
 216         int failed = 0;
 217
 218         for (i = 0; i < options.tile_size; i++) {
 219                 for (j = 0; j < options.tile_size; j++) {
 220                         unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
 221                         unsigned src_ofs = src_x + j + src_stride * (src_y + i);
 222                         unsigned expect = logical_tile_no*options.tile_size*options.tile_size
 223                             + i*options.tile_size + j;
 224                         uint32_t tmp = src[src_ofs];
 225                         if (tmp != expect) {
 226                             igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
 227                             igt_fail_on(options.trace_tile >= 0 && options.fail);
 228                             failed++;
 229                         }
 230                         /* when not aborting, correct any errors */
 231                         dst[dst_ofs] = expect;
 232                 }
 233         }
 234         igt_fail_on(failed && options.fail);
 235
 236         if (failed > stats.max_failed_reads)
 237                 stats.max_failed_reads = failed;
 238         if (failed)
 239                 stats.num_failed++;
 240 }
 241
 242 static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
 243                          struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
 244                          unsigned logical_tile_no)
 245 {
 246         igt_assert(batch->ptr == batch->buffer);
 247
 248         if (options.ducttape)
 249                 drm_intel_bo_wait_rendering(dst->bo);
 250
 251         if (options.use_cpu_maps) {
 252                 set_to_cpu_domain(src, 0);
 253                 set_to_cpu_domain(dst, 1);
 254         }
 255
 256         cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
 257                  dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
 258                  logical_tile_no);
 259 }
 260
 261 static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
 262                          struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
 263                          unsigned logical_tile_no)
 264 {
 265         uint32_t tmp_tile[options.tile_size*options.tile_size];
 266         int i;
 267
 268         igt_assert(batch->ptr == batch->buffer);
 269
 270         if (options.ducttape)
 271                 drm_intel_bo_wait_rendering(dst->bo);
 272
 273         if (src->tiling == I915_TILING_NONE) {
 274                 for (i = 0; i < options.tile_size; i++) {
 275                         unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
 276                         drm_intel_bo_get_subdata(src->bo, ofs,
 277                                                  options.tile_size*sizeof(uint32_t),
 278                                                  tmp_tile + options.tile_size*i);
 279                 }
 280         } else {
 281                 if (options.use_cpu_maps)
 282                         set_to_cpu_domain(src, 0);
 283
 284                 cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
 285                          tmp_tile, options.tile_size, 0, 0, logical_tile_no);
 286         }
 287
 288         if (dst->tiling == I915_TILING_NONE) {
 289                 for (i = 0; i < options.tile_size; i++) {
 290                         unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
 291                         drm_intel_bo_subdata(dst->bo, ofs,
 292                                              options.tile_size*sizeof(uint32_t),
 293                                              tmp_tile + options.tile_size*i);
 294                 }
 295         } else {
 296                 if (options.use_cpu_maps)
 297                         set_to_cpu_domain(dst, 1);
 298
 299                 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
 300                          dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
 301                          logical_tile_no);
 302         }
 303 }
 304
 305 static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
 306                              struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
 307                              unsigned logical_tile_no)
 308 {
 309         static unsigned keep_gpu_busy_counter = 0;
 310
 311         /* check both edges of the fence usage */
 312         if (keep_gpu_busy_counter & 1 && !fence_storm)
 313                 keep_gpu_busy();
 314
 315         emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
 316                  options.tile_size, options.tile_size,
 317                  dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
 318
 319         if (!(keep_gpu_busy_counter & 1) && !fence_storm)
 320                 keep_gpu_busy();
 321
 322         keep_gpu_busy_counter++;
 323
 324         if (src->tiling)
 325                 fence_storm--;
 326         if (dst->tiling)
 327                 fence_storm--;
 328
 329         if (fence_storm <= 1) {
 330                 fence_storm = 0;
 331                 intel_batchbuffer_flush(batch);
 332         }
 333 }
 334
 335 static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
 336                             struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
 337                             unsigned logical_tile_no)
 338 {
 339         static unsigned keep_gpu_busy_counter = 0;
 340         igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
 341
 342         /* check both edges of the fence usage */
 343         if (keep_gpu_busy_counter & 1)
 344                 keep_gpu_busy();
 345
 346         if (rendercopy) {
 347                 /*
 348                  * Flush outstanding blts so that they don't end up on
 349                  * the render ring when that's not allowed (gen6+).
 350                  */
 351                 intel_batchbuffer_flush(batch);
 352                 rendercopy(batch, NULL, src, src_x, src_y,
 353                      options.tile_size, options.tile_size,
 354                      dst, dst_x, dst_y);
 355         } else
 356                 blitter_copyfunc(src, src_x, src_y,
 357                                  dst, dst_x, dst_y,
 358                                  logical_tile_no);
 359         if (!(keep_gpu_busy_counter & 1))
 360                 keep_gpu_busy();
 361
 362         keep_gpu_busy_counter++;
 363         intel_batchbuffer_flush(batch);
 364 }
 365
 366 static void next_copyfunc(int tile)
 367 {
 368         if (fence_storm) {
 369                 if (tile == options.trace_tile)
 370                         igt_info(" using fence storm\n");
 371                 return;
 372         }
 373
 374         if (copyfunc_seq % 61 == 0
 375                         && options.forced_tiling != I915_TILING_NONE) {
 376                 if (tile == options.trace_tile)
 377                         igt_info(" using fence storm\n");
 378                 fence_storm = num_fences;
 379                 copyfunc = blitter_copyfunc;
 380         } else if (copyfunc_seq % 17 == 0) {
 381                 if (tile == options.trace_tile)
 382                         igt_info(" using cpu\n");
 383                 copyfunc = cpu_copyfunc;
 384         } else if (copyfunc_seq % 19 == 0) {
 385                 if (tile == options.trace_tile)
 386                         igt_info(" using prw\n");
 387                 copyfunc = prw_copyfunc;
 388         } else if (copyfunc_seq % 3 == 0 && options.use_render) {
 389                 if (tile == options.trace_tile)
 390                         igt_info(" using render\n");
 391                 copyfunc = render_copyfunc;
 392         } else if (options.use_blt){
 393                 if (tile == options.trace_tile)
 394                         igt_info(" using blitter\n");
 395                 copyfunc = blitter_copyfunc;
 396         } else if (options.use_render){
 397                 if (tile == options.trace_tile)
 398                         igt_info(" using render\n");
 399                 copyfunc = render_copyfunc;
 400         } else {
 401                 copyfunc = cpu_copyfunc;
 402         }
 403
 404         copyfunc_seq++;
 405 }
 406
 407 static void fan_out(void)
 408 {
 409         uint32_t tmp_tile[options.tile_size*options.tile_size];
 410         uint32_t seq = 0;
 411         int i, k;
 412         unsigned tile, buf_idx, x, y;
 413
 414         for (i = 0; i < num_total_tiles; i++) {
 415                 tile = i;
 416                 buf_idx = tile / options.tiles_per_buf;
 417                 tile %= options.tiles_per_buf;
 418
 419                 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
 420
 421                 for (k = 0; k < options.tile_size*options.tile_size; k++)
 422                         tmp_tile[k] = seq++;
 423
 424                 if (options.use_cpu_maps)
 425                         set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
 426
 427                 cpucpy2d(tmp_tile, options.tile_size, 0, 0,
 428                          buffers[current_set][buf_idx].data,
 429                          buffers[current_set][buf_idx].stride / sizeof(uint32_t),
 430                          x, y, i);
 431         }
 432
 433         for (i = 0; i < num_total_tiles; i++)
 434                 tile_permutation[i] = i;
 435 }
 436
 437 static void fan_in_and_check(void)
 438 {
 439         uint32_t tmp_tile[options.tile_size*options.tile_size];
 440         unsigned tile, buf_idx, x, y;
 441         int i;
 442         for (i = 0; i < num_total_tiles; i++) {
 443                 tile = tile_permutation[i];
 444                 buf_idx = tile / options.tiles_per_buf;
 445                 tile %= options.tiles_per_buf;
 446
 447                 tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
 448
 449                 if (options.use_cpu_maps)
 450                         set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
 451
 452                 cpucpy2d(buffers[current_set][buf_idx].data,
 453                          buffers[current_set][buf_idx].stride / sizeof(uint32_t),
 454                          x, y,
 455                          tmp_tile, options.tile_size, 0, 0,
 456                          i);
 457         }
 458 }
 459
 460 static void sanitize_stride(struct igt_buf *buf)
 461 {
 462
 463         if (igt_buf_height(buf) > options.max_dimension)
 464                 buf->stride = buf->size / options.max_dimension;
 465
 466         if (igt_buf_height(buf) < options.tile_size)
 467                 buf->stride = buf->size / options.tile_size;
 468
 469         if (igt_buf_width(buf) < options.tile_size)
 470                 buf->stride = options.tile_size * sizeof(uint32_t);
 471
 472         igt_assert(buf->stride <= 8192);
 473         igt_assert(igt_buf_width(buf) <= options.max_dimension);
 474         igt_assert(igt_buf_height(buf) <= options.max_dimension);
 475
 476         igt_assert(igt_buf_width(buf) >= options.tile_size);
 477         igt_assert(igt_buf_height(buf) >= options.tile_size);
 478
 479 }
 480
 481 static void init_buffer(struct igt_buf *buf, unsigned size)
 482 {
 483         buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
 484         buf->size = size;
 485         igt_assert(buf->bo);
 486         buf->tiling = I915_TILING_NONE;
 487         buf->stride = 4096;
 488
 489         sanitize_stride(buf);
 490
 491         if (options.no_hw)
 492                 buf->data = malloc(size);
 493         else {
 494                 if (options.use_cpu_maps)
 495                         drm_intel_bo_map(buf->bo, 1);
 496                 else
 497                         drm_intel_gem_bo_map_gtt(buf->bo);
 498                 buf->data = buf->bo->virtual;
 499         }
 500
 501         buf->num_tiles = options.tiles_per_buf;
 502 }
 503
 504 static void exchange_buf(void *array, unsigned i, unsigned j)
 505 {
 506         struct igt_buf *buf_arr, tmp;
 507         buf_arr = array;
 508
 509         memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
 510         memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
 511         memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
 512 }
 513
 514
 515 static void init_set(unsigned set)
 516 {
 517         long int r;
 518         int i;
 519
 520         igt_permute_array(buffers[set], num_buffers, exchange_buf);
 521
 522         if (current_set == 1 && options.gpu_busy_load == 0) {
 523                 gpu_busy_load++;
 524                 if (gpu_busy_load > 10)
 525                         gpu_busy_load = 6;
 526         }
 527
 528         for (i = 0; i < num_buffers; i++) {
 529                 r = random();
 530                 if ((r & 3) != 0)
 531                     continue;
 532                 r >>= 2;
 533
 534                 if ((r & 3) != 0)
 535                         buffers[set][i].tiling = I915_TILING_X;
 536                 else
 537                         buffers[set][i].tiling = I915_TILING_NONE;
 538                 r >>= 2;
 539                 if (options.forced_tiling >= 0)
 540                         buffers[set][i].tiling = options.forced_tiling;
 541
 542                 if (buffers[set][i].tiling == I915_TILING_NONE) {
 543                         /* min 64 byte stride */
 544                         r %= 8;
 545                         buffers[set][i].stride = 64 * (1 << r);
 546                 } else if (IS_GEN2(devid)) {
 547                         /* min 128 byte stride */
 548                         r %= 7;
 549                         buffers[set][i].stride = 128 * (1 << r);
 550                 } else {
 551                         /* min 512 byte stride */
 552                         r %= 5;
 553                         buffers[set][i].stride = 512 * (1 << r);
 554                 }
 555
 556                 sanitize_stride(&buffers[set][i]);
 557
 558                 gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
 559                                buffers[set][i].tiling,
 560                                buffers[set][i].stride);
 561
 562                 if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
 563                         igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
 564         }
 565 }
 566
 567 static void exchange_uint(void *array, unsigned i, unsigned j)
 568 {
 569         unsigned *i_arr = array;
 570         unsigned i_tmp;
 571
 572         i_tmp = i_arr[i];
 573         i_arr[i] = i_arr[j];
 574         i_arr[j] = i_tmp;
 575 }
 576
 577 static void copy_tiles(unsigned *permutation)
 578 {
 579         unsigned src_tile, src_buf_idx, src_x, src_y;
 580         unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
 581         struct igt_buf *src_buf, *dst_buf;
 582         int i, idx;
 583         for (i = 0; i < num_total_tiles; i++) {
 584                 /* tile_permutation is independent of current_permutation, so
 585                  * abuse it to randomize the order of the src bos */
 586                 idx  = tile_permutation[i];
 587                 src_buf_idx = idx / options.tiles_per_buf;
 588                 src_tile = idx % options.tiles_per_buf;
 589                 src_buf = &buffers[current_set][src_buf_idx];
 590
 591                 tile2xy(src_buf, src_tile, &src_x, &src_y);
 592
 593                 dst_buf_idx = permutation[idx] / options.tiles_per_buf;
 594                 dst_tile = permutation[idx] % options.tiles_per_buf;
 595                 dst_buf = &buffers[target_set][dst_buf_idx];
 596
 597                 tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
 598
 599                 if (options.trace_tile == i)
 600                         igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
 601
 602                 if (options.no_hw) {
 603                         cpucpy2d(src_buf->data,
 604                                  src_buf->stride / sizeof(uint32_t),
 605                                  src_x, src_y,
 606                                  dst_buf->data,
 607                                  dst_buf->stride / sizeof(uint32_t),
 608                                  dst_x, dst_y,
 609                                  i);
 610                 } else {
 611                         next_copyfunc(i);
 612
 613                         copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
 614                                  i);
 615                 }
 616         }
 617
 618         intel_batchbuffer_flush(batch);
 619 }
 620
 621 static void sanitize_tiles_per_buf(void)
 622 {
 623         if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
 624                 options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
 625 }
 626
 627 static void parse_options(int argc, char **argv)
 628 {
 629         int c, tmp;
 630         int option_index = 0;
 631         static struct option long_options[] = {
 632                 {"no-hw", 0, 0, 'd'},
 633                 {"buf-size", 1, 0, 's'},
 634                 {"gpu-busy-load", 1, 0, 'g'},
 635                 {"no-signals", 0, 0, 'S'},
 636                 {"buffer-count", 1, 0, 'c'},
 637                 {"trace-tile", 1, 0, 't'},
 638                 {"disable-blt", 0, 0, 'b'},
 639                 {"disable-render", 0, 0, 'r'},
 640                 {"untiled", 0, 0, 'u'},
 641                 {"x-tiled", 0, 0, 'x'},
 642                 {"use-cpu-maps", 0, 0, 'm'},
 643                 {"rounds", 1, 0, 'o'},
 644                 {"no-fail", 0, 0, 'f'},
 645                 {"tiles-per-buf", 0, 0, 'p'},
 646 #define DUCTAPE 0xdead0001
 647                 {"remove-duct-tape", 0, 0, DUCTAPE},
 648 #define TILESZ  0xdead0002
 649                 {"tile-size", 1, 0, TILESZ},
 650 #define CHCK_RENDER 0xdead0003
 651                 {"check-render-cpyfn", 0, 0, CHCK_RENDER},
 652                 {NULL, 0, 0, 0},
 653         };
 654
 655         options.scratch_buf_size = 256*4096;
 656         options.no_hw = 0;
 657         options.use_signal_helper = 1;
 658         options.gpu_busy_load = 0;
 659         options.num_buffers = 0;
 660         options.trace_tile = -1;
 661         options.use_render = 1;
 662         options.use_blt = 1;
 663         options.forced_tiling = -1;
 664         options.use_cpu_maps = 0;
 665         options.total_rounds = 512;
 666         options.fail = 1;
 667         options.ducttape = 1;
 668         options.tile_size = 16;
 669         options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
 670         options.check_render_cpyfn = 0;
 671
 672         while((c = getopt_long(argc, argv, "ds:g:c:t:rbuxmo:fp:",
 673                                long_options, &option_index)) != -1) {
 674                 switch(c) {
 675                 case 'd':
 676                         options.no_hw = 1;
 677                         igt_info("no-hw debug mode\n");
 678                         break;
 679                 case 'S':
 680                         options.use_signal_helper = 0;
 681                         igt_info("disabling that pesky nuisance who keeps interrupting us\n");
 682                         break;
 683                 case 's':
 684                         tmp = atoi(optarg);
 685                         if (tmp < options.tile_size*8192)
 686                                 igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
 687                         else if (tmp & (tmp - 1)) {
 688                                 igt_info("scratch buffer size needs to be a power-of-two\n");
 689                         } else {
 690                                 igt_info("fixed scratch buffer size to %u\n", tmp);
 691                                 options.scratch_buf_size = tmp;
 692                                 sanitize_tiles_per_buf();
 693                         }
 694                         break;
 695                 case 'g':
 696                         tmp = atoi(optarg);
 697                         if (tmp < 0 || tmp > 10)
 698                                 igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
 699                         else {
 700                                 igt_info("gpu busy load factor set to %i\n", tmp);
 701                                 gpu_busy_load = options.gpu_busy_load = tmp;
 702                         }
 703                         break;
 704                 case 'c':
 705                         options.num_buffers = atoi(optarg);
 706                         igt_info("buffer count set to %i\n", options.num_buffers);
 707                         break;
 708                 case 't':
 709                         options.trace_tile = atoi(optarg);
 710                         igt_info("tracing tile %i\n", options.trace_tile);
 711                         break;
 712                 case 'r':
 713                         options.use_render = 0;
 714                         igt_info("disabling render copy\n");
 715                         break;
 716                 case 'b':
 717                         options.use_blt = 0;
 718                         igt_info("disabling blt copy\n");
 719                         break;
 720                 case 'u':
 721                         options.forced_tiling = I915_TILING_NONE;
 722                         igt_info("disabling tiling\n");
 723                         break;
 724                 case 'x':
 725                         if (options.use_cpu_maps) {
 726                                 igt_info("tiling not possible with cpu maps\n");
 727                         } else {
 728                                 options.forced_tiling = I915_TILING_X;
 729                                 igt_info("using only X-tiling\n");
 730                         }
 731                         break;
 732                 case 'm':
 733                         options.use_cpu_maps = 1;
 734                         options.forced_tiling = I915_TILING_NONE;
 735                         igt_info("disabling tiling\n");
 736                         break;
 737                 case 'o':
 738                         options.total_rounds = atoi(optarg);
 739                         igt_info("total rounds %i\n", options.total_rounds);
 740                         break;
 741                 case 'f':
 742                         options.fail = 0;
 743                         igt_info("not failing when detecting errors\n");
 744                         break;
 745                 case 'p':
 746                         options.tiles_per_buf = atoi(optarg);
 747                         igt_info("tiles per buffer %i\n", options.tiles_per_buf);
 748                         break;
 749                 case DUCTAPE:
 750                         options.ducttape = 0;
 751                         igt_info("applying duct-tape\n");
 752                         break;
 753                 case TILESZ:
 754                         options.tile_size = atoi(optarg);
 755                         sanitize_tiles_per_buf();
 756                         igt_info("til size %i\n", options.tile_size);
 757                         break;
 758                 case CHCK_RENDER:
 759                         options.check_render_cpyfn = 1;
 760                         igt_info("checking render copy function\n");
 761                         break;
 762                 default:
 763                         igt_info("unkown command options\n");
 764                         break;
 765                 }
 766         }
 767
 768         if (optind < argc)
 769                 igt_info("unkown command options\n");
 770
 771         /* actually 32767, according to docs, but that kills our nice pot calculations. */
 772         options.max_dimension = 16*1024;
 773         if (options.use_render) {
 774                 if (IS_GEN2(devid) || IS_GEN3(devid))
 775                         options.max_dimension = 2048;
 776                 else
 777                         options.max_dimension = 8192;
 778         }
 779         igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
 780 }
 781
 782 static void init(void)
 783 {
 784         int i;
 785         unsigned tmp;
 786
 787         if (options.num_buffers == 0) {
 788                 tmp = gem_aperture_size(drm_fd);
 789                 tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp;
 790                 num_buffers = 2 * tmp / options.scratch_buf_size / 3;
 791                 num_buffers /= 2;
 792                 igt_info("using %u buffers\n", num_buffers);
 793         } else
 794                 num_buffers = options.num_buffers;
 795
 796         bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
 797         drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 798         drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
 799         num_fences = gem_available_fences(drm_fd);
 800         igt_assert(num_fences > 4);
 801         batch = intel_batchbuffer_alloc(bufmgr, devid);
 802
 803         busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
 804         if (options.forced_tiling >= 0)
 805                 gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
 806
 807         for (i = 0; i < num_buffers; i++) {
 808                 init_buffer(&buffers[0][i], options.scratch_buf_size);
 809                 init_buffer(&buffers[1][i], options.scratch_buf_size);
 810
 811                 num_total_tiles += buffers[0][i].num_tiles;
 812         }
 813         current_set = 0;
 814
 815         /* just in case it helps reproducability */
 816         srandom(0xdeadbeef);
 817 }
 818
 819 static void check_render_copyfunc(void)
 820 {
 821         struct igt_buf src, dst;
 822         uint32_t *ptr;
 823         int i, j, pass;
 824
 825         if (!options.check_render_cpyfn)
 826                 return;
 827
 828         init_buffer(&src, options.scratch_buf_size);
 829         init_buffer(&dst, options.scratch_buf_size);
 830
 831         for (pass = 0; pass < 16; pass++) {
 832                 int sx = random() % (igt_buf_width(&src)-options.tile_size);
 833                 int sy = random() % (igt_buf_height(&src)-options.tile_size);
 834                 int dx = random() % (igt_buf_width(&dst)-options.tile_size);
 835                 int dy = random() % (igt_buf_height(&dst)-options.tile_size);
 836
 837                 if (options.use_cpu_maps)
 838                         set_to_cpu_domain(&src, 1);
 839
 840                 memset(src.data, 0xff, options.scratch_buf_size);
 841                 for (j = 0; j < options.tile_size; j++) {
 842                         ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
 843                         for (i = 0; i < options.tile_size; i++)
 844                                 ptr[i] = j * options.tile_size + i;
 845                 }
 846
 847                 render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
 848
 849                 if (options.use_cpu_maps)
 850                         set_to_cpu_domain(&dst, 0);
 851
 852                 for (j = 0; j < options.tile_size; j++) {
 853                         ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
 854                         for (i = 0; i < options.tile_size; i++)
 855                                 if (ptr[i] != j * options.tile_size + i) {
 856                                         igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
 857                                 }
 858                 }
 859         }
 860 }
 861
 862
 863 int main(int argc, char **argv)
 864 {
 865         int i, j;
 866         unsigned *current_permutation, *tmp_permutation;
 867
 868         igt_simple_init();
 869
 870         drm_fd = drm_open_any();
 871         devid = intel_get_drm_devid(drm_fd);
 872
 873         parse_options(argc, argv);
 874
 875         /* start our little helper early before too may allocations occur */
 876         if (options.use_signal_helper)
 877                 igt_fork_signal_helper();
 878
 879         init();
 880
 881         check_render_copyfunc();
 882
 883         tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
 884         current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
 885         tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
 886         igt_assert(tile_permutation);
 887         igt_assert(current_permutation);
 888         igt_assert(tmp_permutation);
 889
 890         fan_out();
 891
 892         for (i = 0; i < options.total_rounds; i++) {
 893                 igt_info("round %i\n", i);
 894                 if (i % 64 == 63) {
 895                         fan_in_and_check();
 896                         igt_info("everything correct after %i rounds\n", i + 1);
 897                 }
 898
 899                 target_set = (current_set + 1) & 1;
 900                 init_set(target_set);
 901
 902                 for (j = 0; j < num_total_tiles; j++)
 903                         current_permutation[j] = j;
 904                 igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
 905
 906                 copy_tiles(current_permutation);
 907
 908                 memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
 909
 910                 /* accumulate the permutations */
 911                 for (j = 0; j < num_total_tiles; j++)
 912                         tile_permutation[j] = current_permutation[tmp_permutation[j]];
 913
 914                 current_set = target_set;
 915         }
 916
 917         fan_in_and_check();
 918
 919         igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
 920
 921         intel_batchbuffer_free(batch);
 922         drm_intel_bufmgr_destroy(bufmgr);
 923
 924         close(drm_fd);
 925
 926         igt_stop_signal_helper();
 927
 928         return 0;
 929 }