tests/prime_nv_pcopy.c

   1 /* basic set of prime tests between intel and nouveau */
   2
   3 /* test list -
   4    1. share buffer from intel -> nouveau.
   5    2. share buffer from nouveau -> intel
   6    3. share intel->nouveau, map on both, write intel, read nouveau
   7    4. share intel->nouveau, blit intel fill, readback on nouveau
   8    test 1 + map buffer, read/write, map other size.
   9    do some hw actions on the buffer
  10    some illegal operations -
  11        close prime fd try and map
  12
  13    TODO add some nouveau rendering tests
  14 */
  15
  16
  17 #include <stdio.h>
  18 #include <stdlib.h>
  19 #include <unistd.h>
  20 #include <fcntl.h>
  21 #include <string.h>
  22 #include <sys/stat.h>
  23 #include <sys/ioctl.h>
  24 #include <errno.h>
  25
  26 #include "ioctl_wrappers.h"
  27 #include "intel_bufmgr.h"
  28 #include "nouveau.h"
  29 #include "intel_io.h"
  30 #include "intel_batchbuffer.h"
  31 #include "intel_chipset.h"
  32 #include "drmtest.h"
  33
  34 static int intel_fd = -1, nouveau_fd = -1;
  35 static drm_intel_bufmgr *bufmgr;
  36 static struct nouveau_device *ndev;
  37 static struct nouveau_client *nclient;
  38 static uint32_t devid;
  39 static struct intel_batchbuffer *batch;
  40 static struct nouveau_object *nchannel, *pcopy;
  41 static struct nouveau_bufctx *nbufctx;
  42 static struct nouveau_pushbuf *npush;
  43
  44 static struct nouveau_bo *query_bo;
  45 static uint32_t query_counter;
  46 static volatile uint32_t *query;
  47 static uint32_t memtype_intel, tile_intel_y, tile_intel_x;
  48
  49 #define SUBC_COPY(x) 6, (x)
  50 #define NV01_SUBCHAN_OBJECT 0
  51
  52 #define NV01_SUBC(subc, mthd) SUBC_##subc((NV01_SUBCHAN_##mthd))
  53
  54 typedef struct {
  55         uint32_t w, h;
  56         uint32_t pitch, lines;
  57 } rect;
  58
  59 static void nv_bo_alloc(struct nouveau_bo **bo, rect *r,
  60                         uint32_t w, uint32_t h, uint32_t tile_mode,
  61                         int handle, uint32_t dom)
  62 {
  63         uint32_t size;
  64         uint32_t dx = 1, dy = 1, memtype = 0;
  65
  66         *bo = NULL;
  67         if (tile_mode) {
  68                 uint32_t tile_y;
  69                 uint32_t tile_x;
  70
  71                 /* Y major tiling */
  72                 if ((tile_mode & 0xf) == 0xe)
  73                         /* but the internal layout is different */
  74                         tile_x = 7;
  75                 else
  76                         tile_x = 6 + (tile_mode & 0xf);
  77                 if (ndev->chipset < 0xc0) {
  78                         memtype = 0x70;
  79                         tile_y = 2;
  80                 } else {
  81                         memtype = 0xfe;
  82                         tile_y = 3;
  83                 }
  84                 if ((tile_mode & 0xf) == 0xe)
  85                         memtype = memtype_intel;
  86                 tile_y += ((tile_mode & 0xf0)>>4);
  87
  88                 dx = 1 << tile_x;
  89                 dy = 1 << tile_y;
  90                 igt_debug("Tiling requirements: x y %u %u\n", dx, dy);
  91         }
  92
  93         r->w = w;
  94         r->h = h;
  95
  96         r->pitch = w = (w + dx-1) & ~(dx-1);
  97         r->lines = h = (h + dy-1) & ~(dy-1);
  98         size = w*h;
  99
 100         if (handle < 0) {
 101                 union nouveau_bo_config cfg;
 102                 cfg.nv50.memtype = memtype;
 103                 cfg.nv50.tile_mode = tile_mode;
 104                 if (dom == NOUVEAU_BO_GART)
 105                         dom |= NOUVEAU_BO_MAP;
 106                 igt_assert(nouveau_bo_new(ndev, dom, 4096, size, &cfg, bo) == 0);
 107                 igt_assert(nouveau_bo_map(*bo, NOUVEAU_BO_RDWR, nclient) == 0);
 108
 109                 igt_debug("new flags %08x memtype %08x tile %08x\n",
 110                           (*bo)->flags, (*bo)->config.nv50.memtype,
 111                           (*bo)->config.nv50.tile_mode);
 112                 if (tile_mode == tile_intel_y || tile_mode == tile_intel_x) {
 113                         igt_debug("tile mode was: %02x, now: %02x\n",
 114                                   (*bo)->config.nv50.tile_mode, tile_mode);
 115                         /* Doesn't like intel tiling much.. */
 116                         (*bo)->config.nv50.tile_mode = tile_mode;
 117                 }
 118         } else {
 119                 igt_assert(nouveau_bo_prime_handle_ref(ndev, handle, bo) == 0);
 120                 close(handle);
 121                 igt_assert_f((*bo)->size >= size,
 122                              "expected bo size to be at least %u,"
 123                              "but received %"PRIu64"\n", size, (*bo)->size);
 124                 igt_debug("prime flags %08x memtype %08x tile %08x\n",
 125                           (*bo)->flags, (*bo)->config.nv50.memtype,
 126                           (*bo)->config.nv50.tile_mode);
 127                 (*bo)->config.nv50.memtype = memtype;
 128                 (*bo)->config.nv50.tile_mode = tile_mode;
 129         }
 130         igt_debug("size: %"PRIu64"\n", (*bo)->size);
 131 }
 132
 133 static inline void
 134 PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
 135 {
 136         *push->cur++ = data;
 137 }
 138
 139 static inline void
 140 BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 141 {
 142         PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
 143 }
 144
 145 static inline void
 146 BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 147 {
 148         PUSH_DATA (push, 0x40000000 | (size << 18) | (subc << 13) | mthd);
 149 }
 150
 151 static inline void
 152 BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 153 {
 154         PUSH_DATA (push, 0x20000000 | (size << 16) | (subc << 13) | (mthd / 4));
 155 }
 156
 157 static inline void
 158 BEGIN_NVXX(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 159 {
 160         if (ndev->chipset < 0xc0)
 161                 BEGIN_NV04(push, subc, mthd, size);
 162         else
 163                 BEGIN_NVC0(push, subc, mthd, size);
 164 }
 165
 166 static void
 167 noop_intel(drm_intel_bo *bo)
 168 {
 169         BEGIN_BATCH(3);
 170         OUT_BATCH(MI_NOOP);
 171         OUT_BATCH(MI_BATCH_BUFFER_END);
 172         OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER,
 173                         I915_GEM_DOMAIN_RENDER, 0);
 174         ADVANCE_BATCH();
 175
 176         intel_batchbuffer_flush(batch);
 177 }
 178
 179 static void find_and_open_devices(void)
 180 {
 181         int i;
 182         char path[80], *unused;
 183         struct stat buf;
 184         FILE *fl;
 185         char vendor_id[8] = {};
 186         int venid;
 187         for (i = 0; i < 9; i++) {
 188                 sprintf(path, "/sys/class/drm/card%d/device/vendor", i);
 189                 if (stat(path, &buf))
 190                         break;
 191
 192                 fl = fopen(path, "r");
 193                 if (!fl)
 194                         break;
 195
 196                 unused = fgets(vendor_id, sizeof(vendor_id)-1, fl);
 197                 (void)unused;
 198                 fclose(fl);
 199
 200                 venid = strtoul(vendor_id, NULL, 16);
 201                 sprintf(path, "/dev/dri/card%d", i);
 202                 if (venid == 0x8086) {
 203                         intel_fd = open(path, O_RDWR);
 204                         igt_assert(intel_fd);
 205                 } else if (venid == 0x10de) {
 206                         nouveau_fd = open(path, O_RDWR);
 207                         igt_assert(nouveau_fd);
 208                 }
 209         }
 210 }
 211
 212 static void init_nouveau(void)
 213 {
 214         struct nv04_fifo nv04_data = { .vram = 0xbeef0201,
 215                                        .gart = 0xbeef0202 };
 216         struct nvc0_fifo nvc0_data = { };
 217         struct nouveau_fifo *fifo;
 218         int size;
 219         uint32_t class;
 220         void *data;
 221
 222         igt_assert(nouveau_device_wrap(nouveau_fd, 0, &ndev) == 0);
 223
 224         igt_assert(nouveau_client_new(ndev, &nclient) == 0);
 225
 226         igt_skip_on_f(ndev->chipset < 0xa3 || ndev->chipset == 0xaa || ndev->chipset == 0xac,
 227                       "Your card doesn't support PCOPY\n");
 228
 229         // TODO: Get a kepler and add support for it
 230         igt_skip_on_f(ndev->chipset >= 0xe0,
 231                       "Unsure how kepler works!\n");
 232         igt_assert(nouveau_bo_new(ndev,  NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
 233                                   4096, 4096, NULL, &query_bo) == 0);
 234         igt_assert(nouveau_bo_map(query_bo, NOUVEAU_BO_RDWR, nclient) == 0);
 235         query = query_bo->map;
 236         *query = query_counter;
 237
 238         if (ndev->chipset < 0xc0) {
 239                 class = 0x85b5;
 240                 data = &nv04_data;
 241                 size = sizeof(nv04_data);
 242         } else {
 243                 class = ndev->chipset < 0xe0 ? 0x490b5 : 0xa0b5;
 244                 data = &nvc0_data;
 245                 size = sizeof(nvc0_data);
 246         }
 247
 248         igt_assert(nouveau_object_new(&ndev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
 249                                       data, size, &nchannel) == 0);
 250
 251         fifo = nchannel->data;
 252
 253         igt_assert(nouveau_pushbuf_new(nclient, nchannel, 4, 32 * 1024,
 254                                        true, &npush) == 0);
 255
 256         igt_assert(nouveau_bufctx_new(nclient, 1, &nbufctx) == 0);
 257
 258         npush->user_priv = nbufctx;
 259
 260         /* Hope this is enough init for PCOPY */
 261         igt_assert(nouveau_object_new(nchannel, class, class & 0xffff, NULL, 0, &pcopy) == 0);
 262         igt_assert(nouveau_pushbuf_space(npush, 512, 0, 0) == 0);
 263
 264         if (ndev->chipset < 0xc0) {
 265                 struct nv04_fifo *nv04_fifo = (struct nv04_fifo*)fifo;
 266                 tile_intel_y = 0x3e;
 267                 tile_intel_x = 0x13;
 268
 269                 BEGIN_NV04(npush, NV01_SUBC(COPY, OBJECT), 1);
 270                 PUSH_DATA(npush, pcopy->handle);
 271                 BEGIN_NV04(npush, SUBC_COPY(0x0180), 3);
 272                 PUSH_DATA(npush, nv04_fifo->vram);
 273                 PUSH_DATA(npush, nv04_fifo->vram);
 274                 PUSH_DATA(npush, nv04_fifo->vram);
 275         } else {
 276                 tile_intel_y = 0x2e;
 277                 tile_intel_x = 0x03;
 278                 BEGIN_NVC0(npush, NV01_SUBC(COPY, OBJECT), 1);
 279                 PUSH_DATA(npush, pcopy->handle);
 280         }
 281         nouveau_pushbuf_kick(npush, npush->channel);
 282 }
 283
 284 static void fill16(void *ptr, uint32_t val)
 285 {
 286         uint32_t *p = ptr;
 287         val = (val) | (val << 8) | (val << 16) | (val << 24);
 288         p[0] = p[1] = p[2] = p[3] = val;
 289 }
 290
 291 #define TILE_SIZE 4096
 292
 293 static void swtile_y(uint8_t *out, const uint8_t *in, int w, int h)
 294 {
 295         uint32_t x, y, dx, dy;
 296         uint8_t *endptr = out + w * h;
 297         igt_assert(!(w % 128));
 298         igt_assert(!(h % 32));
 299
 300         for (y = 0; y < h; y += 32) {
 301                 for (x = 0; x < w; x += 128, out += TILE_SIZE) {
 302                         for (dx = 0; dx < 8; ++dx) {
 303                                 for (dy = 0; dy < 32; ++dy) {
 304                                         uint32_t out_ofs = (dx * 32 + dy) * 16;
 305                                         uint32_t in_ofs = (y + dy) * w + (x + 16 * dx);
 306                                         igt_assert(out_ofs < TILE_SIZE);
 307                                         igt_assert(in_ofs < w*h);
 308
 309                                         // To do the Y tiling quirk:
 310                                         // out_ofs = out_ofs ^ (((out_ofs >> 9) & 1) << 6);
 311                                         memcpy(&out[out_ofs], &in[in_ofs], 16);
 312                                 }
 313                         }
 314                 }
 315         }
 316         igt_assert(out == endptr);
 317 }
 318
 319 static void swtile_x(uint8_t *out, const uint8_t *in, int w, int h)
 320 {
 321         uint32_t x, y, dy;
 322         uint8_t *endptr = out + w * h;
 323         igt_assert(!(w % 512));
 324         igt_assert(!(h % 8));
 325
 326         for (y = 0; y < h; y += 8) {
 327                 for (x = 0; x < w; x += 512, out += TILE_SIZE) {
 328                         for (dy = 0; dy < 8; ++dy) {
 329                                 uint32_t out_ofs = 512 * dy;
 330                                 uint32_t in_ofs = (y + dy) * w + x;
 331                                 igt_assert(out_ofs < TILE_SIZE);
 332                                 igt_assert(in_ofs < w*h);
 333                                 memcpy(&out[out_ofs], &in[in_ofs], 512);
 334                         }
 335                 }
 336         }
 337         igt_assert(out == endptr);
 338 }
 339
 340 static void perform_copy(struct nouveau_bo *nvbo, const rect *dst,
 341                          uint32_t dst_x, uint32_t dst_y,
 342                          struct nouveau_bo *nvbi, const rect *src,
 343                          uint32_t src_x, uint32_t src_y,
 344                          uint32_t w, uint32_t h)
 345 {
 346         struct nouveau_pushbuf_refn refs[] = {
 347                 { nvbi, (nvbi->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_RD },
 348                 { nvbo, (nvbo->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_WR },
 349                 { query_bo, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR }
 350         };
 351         uint32_t cpp = 1, exec = 0x00003000; /* QUERY|QUERY_SHORT|FORMAT */
 352         uint32_t src_off = 0, dst_off = 0;
 353         struct nouveau_pushbuf *push = npush;
 354         int ret;
 355
 356         if (nvbi->config.nv50.tile_mode == tile_intel_y)
 357                 igt_debug("src is y-tiled\n");
 358         if (nvbo->config.nv50.tile_mode == tile_intel_y)
 359                 igt_debug("dst is y-tiled\n");
 360
 361         igt_assert(nouveau_pushbuf_space(push, 64, 0, 0) == 0);
 362         igt_assert(nouveau_pushbuf_refn(push, refs, 3) == 0);
 363
 364         if (!nvbi->config.nv50.tile_mode) {
 365                 src_off = src_y * src->pitch + src_x;
 366                 exec |= 0x00000010;
 367         }
 368
 369         if (!nvbo->config.nv50.tile_mode) {
 370                 dst_off = dst_y * dst->pitch + dst_x;
 371                 exec |= 0x00000100;
 372         }
 373
 374         BEGIN_NVXX(push, SUBC_COPY(0x0200), 7);
 375         PUSH_DATA (push, nvbi->config.nv50.tile_mode);
 376         PUSH_DATA (push, src->pitch / cpp);
 377         PUSH_DATA (push, src->h);
 378         PUSH_DATA (push, 1);
 379         PUSH_DATA (push, 0);
 380         PUSH_DATA (push, src_x / cpp);
 381         PUSH_DATA (push, src_y);
 382
 383         BEGIN_NVXX(push, SUBC_COPY(0x0220), 7);
 384         PUSH_DATA (push, nvbo->config.nv50.tile_mode);
 385         PUSH_DATA (push, dst->pitch / cpp);
 386         PUSH_DATA (push, dst->h);
 387         PUSH_DATA (push, 1);
 388         PUSH_DATA (push, 0);
 389         PUSH_DATA (push, dst_x / cpp);
 390         PUSH_DATA (push, dst_y);
 391
 392         BEGIN_NVXX(push, SUBC_COPY(0x030c), 9);
 393         PUSH_DATA (push, (nvbi->offset + src_off) >> 32);
 394         PUSH_DATA (push, (nvbi->offset + src_off));
 395         PUSH_DATA (push, (nvbo->offset + dst_off) >> 32);
 396         PUSH_DATA (push, (nvbo->offset + dst_off));
 397         PUSH_DATA (push, src->pitch);
 398         PUSH_DATA (push, dst->pitch);
 399         PUSH_DATA (push, w / cpp);
 400         PUSH_DATA (push, h);
 401         PUSH_DATA (push, 0x03333120);
 402
 403         BEGIN_NVXX(push, SUBC_COPY(0x0338), 3);
 404         PUSH_DATA (push, (query_bo->offset) >> 32);
 405         PUSH_DATA (push, (query_bo->offset));
 406         PUSH_DATA (push, ++query_counter);
 407
 408         BEGIN_NVXX(push, SUBC_COPY(0x0300), 1);
 409         PUSH_DATA (push, exec);
 410
 411         ret = nouveau_pushbuf_kick(push, push->channel);
 412         while (!ret && *query < query_counter) { usleep(1000); }
 413
 414         igt_assert(ret == 0);
 415 }
 416
 417 static void check1_macro(uint32_t *p, uint32_t w, uint32_t h)
 418 {
 419         uint32_t i, val, j;
 420
 421         for (i = 0; i < 256; ++i, p += 4) {
 422                 val = (i) | (i << 8) | (i << 16) | (i << 24);
 423                 igt_assert_f(p[0] == val && p[1] == val && p[2] == val && p[3] == val,
 424                              "Retile check failed in first tile!\n"
 425                              "%08x %08x %08x %08x instead of %08x\n",
 426                              p[0], p[1], p[2], p[3], val);
 427         }
 428
 429         val = 0x3e3e3e3e;
 430         for (i = 0; i < 256 * (w-1); ++i, p += 4) {
 431                 igt_assert_f(p[0] == val && p[1] == val && p[2] == val && p[3] == val,
 432                              "Retile check failed in second tile!\n"
 433                              "%08x %08x %08x %08x instead of %08x\n",
 434                              p[0], p[1], p[2], p[3], val);
 435         }
 436
 437         for (j = 1; j < h; ++j) {
 438                 val = 0x7e7e7e7e;
 439                 for (i = 0; i < 256; ++i, p += 4) {
 440                         igt_assert_f(p[0] == val && p[1] == val && p[2] == val && p[3] == val,
 441                                      "Retile check failed in third tile!\n"
 442                                      "%08x %08x %08x %08x instead of %08x\n",
 443                                      p[0], p[1], p[2], p[3], val);
 444                 }
 445
 446                 val = 0xcececece;
 447                 for (i = 0; i < 256 * (w-1); ++i, p += 4) {
 448                         igt_assert_f(p[0] == val && p[1] == val && p[2] == val && p[3] == val,
 449                                      "Retile check failed in fourth tile!\n"
 450                                      "%08x %08x %08x %08x instead of %08x\n",
 451                                      p[0], p[1], p[2], p[3], val);
 452                 }
 453         }
 454 }
 455
 456 /* test 1, see if we can copy from linear to intel Y format safely */
 457 static void test1_macro(void)
 458 {
 459         int prime_fd = -1;
 460         struct nouveau_bo *nvbo = NULL, *nvbi = NULL;
 461         rect dst, src;
 462         uint8_t *ptr;
 463         uint32_t w = 2 * 128, h = 2 * 32, x, y;
 464
 465         nv_bo_alloc(&nvbi, &src, w, h, 0, -1, NOUVEAU_BO_GART);
 466         nv_bo_alloc(&nvbo, &dst, w, h, tile_intel_y, -1, NOUVEAU_BO_GART);
 467
 468         nouveau_bo_set_prime(nvbo, &prime_fd);
 469
 470         /* Set up something for our tile that should map into the first
 471          * y-major tile, assuming my understanding of documentation is
 472          * correct
 473          */
 474
 475         /* First tile should be read out in groups of 16 bytes that
 476          * are all set to a linear increasing value..
 477          */
 478         ptr = nvbi->map;
 479         for (x = 0; x < 128; x += 16)
 480                 for (y = 0; y < 32; ++y)
 481                         fill16(&ptr[y * w + x], x * 2 + y);
 482
 483         /* second tile */
 484         for (x = 128; x < w; x += 16)
 485                 for (y = 0; y < 32; ++y)
 486                         fill16(&ptr[y * w + x], 0x3e);
 487
 488         /* third tile */
 489         for (x = 0; x < 128; x += 16)
 490                 for (y = 32; y < h; ++y)
 491                         fill16(&ptr[y * w + x], 0x7e);
 492
 493         /* last tile */
 494         for (x = 128; x < w; x += 16)
 495                 for (y = 32; y < h; ++y)
 496                         fill16(&ptr[y * w + x], 0xce);
 497         memset(nvbo->map, 0xfc, w * h);
 498
 499         if (pcopy)
 500                 perform_copy(nvbo, &dst, 0, 0, nvbi, &src, 0, 0, w, h);
 501         else
 502                 swtile_y(nvbo->map, nvbi->map, w, h);
 503         check1_macro(nvbo->map, w/128, h/32);
 504
 505         nouveau_bo_ref(NULL, &nvbo);
 506         nouveau_bo_ref(NULL, &nvbi);
 507         close(prime_fd);
 508 }
 509
 510 static void dump_line(uint8_t *map)
 511 {
 512         uint32_t dx, dy;
 513         igt_debug("Dumping sub-tile:\n");
 514         for (dy = 0; dy < 32; ++dy) {
 515                 for (dx = 0; dx < 15; ++dx, ++map) {
 516                         igt_debug("%02x ", *map);
 517                 }
 518                 igt_debug("%02x\n", *(map++));
 519         }
 520 }
 521
 522 static void check1_micro(void *map, uint32_t pitch, uint32_t lines,
 523                          uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
 524 {
 525         uint32_t x, y;
 526
 527         /* check only the relevant subrectangle [0..w) [0...h) */
 528         uint8_t *m = map;
 529         for (y = 0; y < h; ++y, m += pitch) {
 530                 for (x = 0; x < w; ++x) {
 531                         uint8_t expected = ((y & 3) << 6) | (x & 0x3f);
 532
 533                         if (expected != m[x])
 534                                 dump_line(m);
 535
 536                         igt_assert_f(expected == m[x],
 537                                      "failed check at x=%u y=%u, expected %02x got %02x\n",
 538                                      x, y, expected, m[x]);
 539                 }
 540         }
 541 }
 542
 543 /* test 1, but check micro format, should be unaffected by bit9 swizzling */
 544 static void test1_micro(void)
 545 {
 546         struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
 547         rect intel, nvidia, linear;
 548         uint32_t tiling = I915_TILING_Y;
 549
 550         uint32_t src_x = 0, src_y = 0;
 551         uint32_t dst_x = 0, dst_y = 0;
 552         uint32_t x, y, w = 256, h = 64;
 553
 554         drm_intel_bo *test_intel_bo;
 555         int prime_fd;
 556
 557         test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", w * h, 4096);
 558         igt_assert(test_intel_bo);
 559         drm_intel_bo_set_tiling(test_intel_bo, &tiling, w);
 560         igt_assert(tiling == I915_TILING_Y);
 561         igt_assert(drm_intel_gem_bo_map_gtt(test_intel_bo) == 0);
 562
 563         drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
 564         igt_assert(prime_fd >= 0);
 565         noop_intel(test_intel_bo);
 566
 567         nv_bo_alloc(&bo_intel, &intel, w, h, tile_intel_y, prime_fd, 0);
 568         nv_bo_alloc(&bo_nvidia, &nvidia, w, h, 0x10, -1, NOUVEAU_BO_VRAM);
 569         nv_bo_alloc(&bo_linear, &linear, w, h, 0, -1, NOUVEAU_BO_GART);
 570
 571         for (y = 0; y < linear.h; ++y) {
 572                 uint8_t *map = bo_linear->map;
 573                 map += y * linear.pitch;
 574                 for (x = 0; x < linear.pitch; ++x) {
 575                         uint8_t pos = x & 0x3f;
 576                         /* low 4 bits: micro tile pos */
 577                         /* 2 bits: x pos in tile (wraps) */
 578                         /* 2 bits: y pos in tile (wraps) */
 579                         pos |= (y & 3) << 6;
 580                         map[x] = pos;
 581                 }
 582         }
 583
 584         perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
 585
 586         /* Perform the actual sub rectangle copy */
 587         if (pcopy)
 588                 perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
 589         else
 590                 swtile_y(test_intel_bo->virtual, bo_linear->map, w, h);
 591
 592         noop_intel(test_intel_bo);
 593         check1_micro(test_intel_bo->virtual, intel.pitch, intel.h, dst_x, dst_y, w, h);
 594
 595         nouveau_bo_ref(NULL, &bo_linear);
 596         nouveau_bo_ref(NULL, &bo_nvidia);
 597         nouveau_bo_ref(NULL, &bo_intel);
 598         drm_intel_bo_unreference(test_intel_bo);
 599 }
 600
 601 /* test 2, see if we can copy from linear to intel X format safely
 602  * Seems nvidia lacks a method to do it, so just keep this test
 603  * as a reference for potential future tests. Software tiling is
 604  * used for now
 605  */
 606 static void test2(void)
 607 {
 608         struct nouveau_bo *nvbo = NULL, *nvbi = NULL;
 609         rect dst, src;
 610         uint8_t *ptr;
 611         uint32_t w = 1024, h = 16, x, y;
 612
 613         nv_bo_alloc(&nvbi, &src, w, h, 0, -1, NOUVEAU_BO_GART);
 614         nv_bo_alloc(&nvbo, &dst, w, h, tile_intel_x, -1, NOUVEAU_BO_GART);
 615
 616         /* Set up something for our tile that should map into the first
 617          * y-major tile, assuming my understanding of documentation is
 618          * correct
 619          */
 620
 621         /* First tile should be read out in groups of 16 bytes that
 622          * are all set to a linear increasing value..
 623          */
 624         ptr = nvbi->map;
 625         for (y = 0; y < 8; ++y)
 626                 for (x = 0; x < 512; x += 16)
 627                         fill16(&ptr[y * w + x], (y * 512 + x)/16);
 628
 629         for (y = 0; y < 8; ++y)
 630                 for (x = 512; x < w; x += 16)
 631                         fill16(&ptr[y * w + x], 0x3e);
 632
 633         for (y = 8; y < h; ++y)
 634                 for (x = 0; x < 512; x += 16)
 635                         fill16(&ptr[y * w + x], 0x7e);
 636
 637         for (y = 8; y < h; ++y)
 638                 for (x = 512; x < w; x += 16)
 639                         fill16(&ptr[y * w + x], 0xce);
 640         memset(nvbo->map, 0xfc, w * h);
 641
 642         /* do this in software, there is no X major tiling in PCOPY (yet?) */
 643         if (0 && pcopy)
 644                 perform_copy(nvbo, &dst, 0, 0, nvbi, &src, 0, 0, w, h);
 645         else
 646                 swtile_x(nvbo->map, nvbi->map, w, h);
 647         check1_macro(nvbo->map, w/512, h/8);
 648
 649         nouveau_bo_ref(NULL, &nvbo);
 650         nouveau_bo_ref(NULL, &nvbi);
 651 }
 652
 653 static void check3(const uint32_t *p, uint32_t pitch, uint32_t lines,
 654                    uint32_t sub_x, uint32_t sub_y,
 655                    uint32_t sub_w, uint32_t sub_h)
 656 {
 657         uint32_t x, y;
 658
 659         sub_w += sub_x;
 660         sub_h += sub_y;
 661
 662         igt_assert_f(p[pitch * lines / 4 - 1] != 0x03030303,
 663                      "copy failed: Not all lines have been copied back!\n");
 664
 665         for (y = 0; y < lines; ++y) {
 666                 for (x = 0; x < pitch; x += 4, ++p) {
 667                         uint32_t expected;
 668                         if ((x < sub_x || x >= sub_w) ||
 669                             (y < sub_y || y >= sub_h))
 670                                 expected = 0x80808080;
 671                         else
 672                                 expected = 0x04040404;
 673                         igt_assert_f(*p == expected,
 674                                      "%u,%u should be %08x, but is %08x\n",
 675                                      x, y, expected, *p);
 676                 }
 677         }
 678 }
 679
 680 /* copy from nvidia bo to intel bo and copy to a linear bo to check if tiling went succesful */
 681 static void test3_base(int tile_src, int tile_dst)
 682 {
 683         struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
 684         rect intel, nvidia, linear;
 685         uint32_t cpp = 4;
 686
 687         uint32_t src_x = 1 * cpp, src_y = 1;
 688         uint32_t dst_x = 2 * cpp, dst_y = 26;
 689         uint32_t w = 298 * cpp, h = 298;
 690
 691         drm_intel_bo *test_intel_bo;
 692         int prime_fd;
 693
 694         test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", 2048 * cpp * 768, 4096);
 695         igt_assert(test_intel_bo);
 696
 697         drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
 698         igt_assert(prime_fd >= 0);
 699
 700         nv_bo_alloc(&bo_intel, &intel, 2048 * cpp, 768, tile_dst, prime_fd, 0);
 701         nv_bo_alloc(&bo_nvidia, &nvidia, 300 * cpp, 300, tile_src, -1, NOUVEAU_BO_VRAM);
 702         nv_bo_alloc(&bo_linear, &linear, 2048 * cpp, 768, 0, -1, NOUVEAU_BO_GART);
 703
 704         noop_intel(test_intel_bo);
 705         memset(bo_linear->map, 0x80, bo_linear->size);
 706         perform_copy(bo_intel, &intel, 0, 0, bo_linear, &linear, 0, 0, linear.pitch, linear.h);
 707         noop_intel(test_intel_bo);
 708
 709         memset(bo_linear->map, 0x04, bo_linear->size);
 710         perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
 711
 712         /* Perform the actual sub rectangle copy */
 713         noop_intel(test_intel_bo);
 714         perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
 715         noop_intel(test_intel_bo);
 716
 717         memset(bo_linear->map, 0x3, bo_linear->size);
 718         noop_intel(test_intel_bo);
 719         perform_copy(bo_linear, &linear, 0, 0, bo_intel, &intel, 0, 0, intel.pitch, intel.h);
 720         noop_intel(test_intel_bo);
 721
 722         check3(bo_linear->map, linear.pitch, linear.h, dst_x, dst_y, w, h);
 723
 724         nouveau_bo_ref(NULL, &bo_linear);
 725         nouveau_bo_ref(NULL, &bo_nvidia);
 726         nouveau_bo_ref(NULL, &bo_intel);
 727         drm_intel_bo_unreference(test_intel_bo);
 728 }
 729
 730 static void test3_1(void)
 731 {
 732         /* nvidia tiling to intel */
 733         test3_base(0x40, tile_intel_y);
 734 }
 735
 736 static void test3_2(void)
 737 {
 738         /* intel tiling to nvidia */
 739         test3_base(tile_intel_y, 0x40);
 740 }
 741
 742 static void test3_3(void)
 743 {
 744         /* intel tiling to linear */
 745         test3_base(tile_intel_y, 0);
 746 }
 747
 748 static void test3_4(void)
 749 {
 750         /* linear tiling to intel */
 751         test3_base(0, tile_intel_y);
 752 }
 753
 754 static void test3_5(void)
 755 {
 756         /* linear to linear */
 757         test3_base(0, 0);
 758 }
 759
 760 /* Acquire when == SEQUENCE */
 761 #define SEMA_ACQUIRE_EQUAL 1
 762
 763 /* Release, and write a 16 byte query structure to sema:
 764  * { (uint32)seq, (uint32)0, (uint64)timestamp } */
 765 #define SEMA_WRITE_LONG 2
 766
 767 /* Acquire when >= SEQUENCE */
 768 #define SEMA_ACQUIRE_GEQUAL 4
 769
 770 /* Test only new style semaphores, old ones are AWFUL */
 771 static void test_semaphore(void)
 772 {
 773         drm_intel_bo *test_intel_bo = NULL;
 774         struct nouveau_bo *sema_bo = NULL;
 775         int prime_fd;
 776         uint32_t *sema;
 777         struct nouveau_pushbuf *push = npush;
 778
 779         igt_skip_on(ndev->chipset < 0x84);
 780
 781         /* Should probably be kept in sysmem */
 782         test_intel_bo = drm_intel_bo_alloc(bufmgr, "semaphore bo", 4096, 4096);
 783         igt_assert(test_intel_bo);
 784
 785         drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
 786         igt_assert(prime_fd >= 0);
 787         igt_assert(nouveau_bo_prime_handle_ref(ndev, prime_fd, &sema_bo) == 0);
 788         close(prime_fd);
 789
 790         igt_assert(drm_intel_gem_bo_map_gtt(test_intel_bo) == 0);
 791         sema = test_intel_bo->virtual;
 792         sema++;
 793         *sema = 0;
 794
 795         igt_assert(nouveau_pushbuf_space(push, 64, 0, 0) == 0);
 796         igt_assert(nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn)
 797                                         { sema_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR }, 1) == 0);
 798
 799         if (ndev->chipset < 0xc0) {
 800                 struct nv04_fifo *nv04_fifo = nchannel->data;
 801                 /* kernel binds it's own dma object here and overwrites old one,
 802                  * so just rebind vram every time we submit
 803                  */
 804                 BEGIN_NV04(npush, SUBC_COPY(0x0060), 1);
 805                 PUSH_DATA(npush, nv04_fifo->vram);
 806         }
 807         BEGIN_NVXX(push, SUBC_COPY(0x0010), 4);
 808         PUSH_DATA(push, sema_bo->offset >> 32);
 809         PUSH_DATA(push, sema_bo->offset + 4);
 810         PUSH_DATA(push, 2); // SEQUENCE
 811         PUSH_DATA(push, SEMA_WRITE_LONG); // TRIGGER
 812
 813         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 814         PUSH_DATA(push, 3);
 815         PUSH_DATA(push, SEMA_ACQUIRE_EQUAL);
 816         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 817         PUSH_DATA(push, 4);
 818         PUSH_DATA(push, SEMA_WRITE_LONG);
 819
 820         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 821         PUSH_DATA(push, 5);
 822         PUSH_DATA(push, SEMA_ACQUIRE_GEQUAL);
 823         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 824         PUSH_DATA(push, 6);
 825         PUSH_DATA(push, SEMA_WRITE_LONG);
 826
 827         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 828         PUSH_DATA(push, 7);
 829         PUSH_DATA(push, SEMA_ACQUIRE_GEQUAL);
 830         BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
 831         PUSH_DATA(push, 9);
 832         PUSH_DATA(push, SEMA_WRITE_LONG);
 833         nouveau_pushbuf_kick(push, push->channel);
 834
 835         usleep(1000);
 836         igt_assert(*sema == 2);
 837
 838         *sema = 3;
 839         usleep(1000);
 840         igt_assert(*sema == 4);
 841
 842         *sema = 5;
 843         usleep(1000);
 844         igt_assert(*sema == 6);
 845
 846         *sema = 8;
 847         usleep(1000);
 848         igt_assert(*sema == 9);
 849
 850         nouveau_bo_ref(NULL, &sema_bo);
 851         drm_intel_bo_unreference(test_intel_bo);
 852 }
 853
 854 igt_main
 855 {
 856         igt_fixture {
 857                 find_and_open_devices();
 858
 859                 igt_require(nouveau_fd != -1);
 860                 igt_require(intel_fd != -1);
 861
 862                 /* set up intel bufmgr */
 863                 bufmgr = drm_intel_bufmgr_gem_init(intel_fd, 4096);
 864                 igt_assert(bufmgr);
 865                 /* Do not enable reuse, we share (almost) all buffers. */
 866                 //drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 867
 868                 /* set up nouveau bufmgr */
 869                 init_nouveau();
 870
 871                 /* set up an intel batch buffer */
 872                 devid = intel_get_drm_devid(intel_fd);
 873                 batch = intel_batchbuffer_alloc(bufmgr, devid);
 874                 igt_assert(batch);
 875         }
 876
 877 #define xtest(x, args...) \
 878         igt_subtest( #x ) \
 879                 (x)(args);
 880
 881         xtest(test1_macro);
 882         xtest(test1_micro);
 883         //xtest(test1_swizzle);
 884         xtest(test2);
 885         xtest(test3_1);
 886         xtest(test3_2);
 887         xtest(test3_3);
 888         xtest(test3_4);
 889         xtest(test3_5);
 890         xtest(test_semaphore);
 891
 892         igt_fixture {
 893                 nouveau_bo_ref(NULL, &query_bo);
 894                 nouveau_object_del(&pcopy);
 895                 nouveau_bufctx_del(&nbufctx);
 896                 nouveau_pushbuf_del(&npush);
 897                 nouveau_object_del(&nchannel);
 898
 899                 intel_batchbuffer_free(batch);
 900
 901                 nouveau_client_del(&nclient);
 902                 nouveau_device_del(&ndev);
 903                 drm_intel_bufmgr_destroy(bufmgr);
 904
 905                 close(intel_fd);
 906                 close(nouveau_fd);
 907         }
 908 }