tests/gem_fence_upload.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Chris Wilson <chris@chris-wilson.co.uk>
  25  *
  26  */
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <unistd.h>
  33 #include <stdlib.h>
  34 #include <stdio.h>
  35 #include <string.h>
  36 #include <fcntl.h>
  37 #include <inttypes.h>
  38 #include <errno.h>
  39 #include <sys/time.h>
  40 #include <pthread.h>
  41 #include "drm.h"
  42 #include "i915_drm.h"
  43 #include "drmtest.h"
  44 #include "ioctl_wrappers.h"
  45
  46 #define OBJECT_SIZE (1024*1024) /* restricted to 1MiB alignment on i915 fences */
  47
  48 static double elapsed(const struct timeval *start,
  49                       const struct timeval *end)
  50 {
  51         return (end->tv_sec - start->tv_sec) + 1e-6*(end->tv_usec - start->tv_usec);
  52 }
  53
  54 static void performance(void)
  55 {
  56         int n, loop, count;
  57         int fd, num_fences;
  58         double linear[2], tiled[2];
  59
  60         fd = drm_open_any();
  61
  62         num_fences = gem_available_fences(fd);
  63         igt_require(num_fences > 0);
  64
  65         for (count = 2; count < 4*num_fences; count *= 2) {
  66                 struct timeval start, end;
  67                 uint32_t handle[count];
  68                 void *ptr[count];
  69
  70                 for (n = 0; n < count; n++) {
  71                         handle[n] = gem_create(fd, OBJECT_SIZE);
  72                         ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
  73                         igt_assert(ptr[n]);
  74                 }
  75
  76                 gettimeofday(&start, NULL);
  77                 for (loop = 0; loop < 1024; loop++) {
  78                         for (n = 0; n < count; n++)
  79                                 memset(ptr[n], 0, OBJECT_SIZE);
  80                 }
  81                 gettimeofday(&end, NULL);
  82
  83                 linear[count != 2] = count * loop / elapsed(&start, &end);
  84                 igt_info("Upload rate for %d linear surfaces:   %7.3fMiB/s\n", count, linear[count != 2]);
  85
  86                 for (n = 0; n < count; n++)
  87                         gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
  88
  89                 gettimeofday(&start, NULL);
  90                 for (loop = 0; loop < 1024; loop++) {
  91                         for (n = 0; n < count; n++)
  92                                 memset(ptr[n], 0, OBJECT_SIZE);
  93                 }
  94                 gettimeofday(&end, NULL);
  95
  96                 tiled[count != 2] = count * loop / elapsed(&start, &end);
  97                 igt_info("Upload rate for %d tiled surfaces:    %7.3fMiB/s\n", count, tiled[count != 2]);
  98
  99                 for (n = 0; n < count; n++) {
 100                         munmap(ptr[n], OBJECT_SIZE);
 101                         gem_close(fd, handle[n]);
 102                 }
 103
 104         }
 105
 106         errno = 0;
 107         igt_assert(linear[1] > 0.75 * linear[0]);
 108         igt_assert(tiled[1] > 0.75 * tiled[0]);
 109 }
 110
 111 struct thread_performance {
 112         pthread_t thread;
 113         int id, count, direction, loops;
 114         void **ptr;
 115 };
 116
 117 static void *read_thread_performance(void *closure)
 118 {
 119         struct thread_performance *t = closure;
 120         uint32_t x = 0;
 121         int n, m;
 122
 123         for (n = 0; n < t->loops; n++) {
 124                 uint32_t *src = t->ptr[rand() % t->count];
 125                 src += (rand() % 256) * 4096 / 4;
 126                 for (m = 0; m < 4096/4; m++)
 127                         x += src[m];
 128         }
 129
 130         return (void *)(uintptr_t)x;
 131 }
 132
 133 static void *write_thread_performance(void *closure)
 134 {
 135         struct thread_performance *t = closure;
 136         int n;
 137
 138         for (n = 0; n < t->loops; n++) {
 139                 uint32_t *dst = t->ptr[rand() % t->count];
 140                 dst += (rand() % 256) * 4096 / 4;
 141                 memset(dst, 0, 4096);
 142         }
 143
 144         return NULL;
 145 }
 146
 147 #define READ (1<<0)
 148 #define WRITE (1<<1)
 149 static const char *direction_string(unsigned mask)
 150 {
 151         switch (mask) {
 152         case READ: return "Download";
 153         case WRITE: return "Upload";
 154         case READ | WRITE: return "Combined";
 155         default: return "Unknown";
 156         }
 157 }
 158 static void thread_performance(unsigned mask)
 159 {
 160         const int loops = 4096;
 161         int n, count;
 162         int fd, num_fences;
 163         double linear[2], tiled[2];
 164
 165         fd = drm_open_any();
 166
 167         num_fences = gem_available_fences(fd);
 168         igt_require(num_fences > 0);
 169
 170         for (count = 2; count < 4*num_fences; count *= 2) {
 171                 const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0);
 172                 struct timeval start, end;
 173                 struct thread_performance readers[count];
 174                 struct thread_performance writers[count];
 175                 uint32_t handle[count];
 176                 void *ptr[count];
 177
 178                 for (n = 0; n < count; n++) {
 179                         handle[n] = gem_create(fd, OBJECT_SIZE);
 180                         ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
 181                         igt_assert(ptr[n]);
 182
 183                         if (mask & READ) {
 184                                 readers[n].id = n;
 185                                 readers[n].direction = READ;
 186                                 readers[n].ptr = ptr;
 187                                 readers[n].count = count;
 188                                 readers[n].loops = loops;
 189                         }
 190
 191                         if (mask & WRITE) {
 192                                 writers[n].id = count - n - 1;
 193                                 writers[n].direction = WRITE;
 194                                 writers[n].ptr = ptr;
 195                                 writers[n].count = count;
 196                                 writers[n].loops = loops;
 197                         }
 198                 }
 199
 200                 gettimeofday(&start, NULL);
 201                 for (n = 0; n < count; n++) {
 202                         if (mask & READ)
 203                                 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
 204                         if (mask & WRITE)
 205                                 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
 206                 }
 207                 for (n = 0; n < count; n++) {
 208                         if (mask & READ)
 209                                 pthread_join(readers[n].thread, NULL);
 210                         if (mask & WRITE)
 211                                 pthread_join(writers[n].thread, NULL);
 212                 }
 213                 gettimeofday(&end, NULL);
 214
 215                 linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
 216                 igt_info("%s rate for %d linear surfaces, %d threads:   %7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]);
 217
 218                 for (n = 0; n < count; n++)
 219                         gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
 220
 221                 gettimeofday(&start, NULL);
 222                 for (n = 0; n < count; n++) {
 223                         if (mask & READ)
 224                                 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
 225                         if (mask & WRITE)
 226                                 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
 227                 }
 228                 for (n = 0; n < count; n++) {
 229                         if (mask & READ)
 230                                 pthread_join(readers[n].thread, NULL);
 231                         if (mask & WRITE)
 232                                 pthread_join(writers[n].thread, NULL);
 233                 }
 234                 gettimeofday(&end, NULL);
 235
 236                 tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
 237                 igt_info("%s rate for %d tiled surfaces, %d threads:    %7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]);
 238
 239                 for (n = 0; n < count; n++) {
 240                         munmap(ptr[n], OBJECT_SIZE);
 241                         gem_close(fd, handle[n]);
 242                 }
 243         }
 244
 245         errno = 0;
 246         igt_assert(linear[1] > 0.75 * linear[0]);
 247         igt_assert(tiled[1] > 0.75 * tiled[0]);
 248 }
 249
 250 struct thread_contention {
 251         pthread_t thread;
 252         uint32_t handle;
 253         int loops, fd;
 254 };
 255 static void *no_contention(void *closure)
 256 {
 257         struct thread_contention *t = closure;
 258         int n;
 259
 260         for (n = 0; n < t->loops; n++) {
 261                 uint32_t *ptr = gem_mmap(t->fd, t->handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
 262                 igt_assert(ptr);
 263                 memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
 264                 munmap(ptr, OBJECT_SIZE);
 265         }
 266
 267         return NULL;
 268 }
 269
 270 static void thread_contention(void)
 271 {
 272         const int loops = 4096;
 273         int n, count;
 274         int fd, num_fences;
 275         double linear[2], tiled[2];
 276
 277         fd = drm_open_any();
 278
 279         num_fences = gem_available_fences(fd);
 280         igt_require(num_fences > 0);
 281
 282         for (count = 1; count < 4*num_fences; count *= 2) {
 283                 struct timeval start, end;
 284                 struct thread_contention threads[count];
 285
 286                 for (n = 0; n < count; n++) {
 287                         threads[n].handle = gem_create(fd, OBJECT_SIZE);
 288                         threads[n].loops = loops;
 289                         threads[n].fd = fd;
 290                 }
 291
 292                 gettimeofday(&start, NULL);
 293                 for (n = 0; n < count; n++)
 294                         pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
 295                 for (n = 0; n < count; n++)
 296                         pthread_join(threads[n].thread, NULL);
 297                 gettimeofday(&end, NULL);
 298
 299                 linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
 300                 igt_info("Contended upload rate for %d linear threads:  %7.3fMiB/s\n", count, linear[count != 2]);
 301
 302                 for (n = 0; n < count; n++)
 303                         gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
 304
 305                 gettimeofday(&start, NULL);
 306                 for (n = 0; n < count; n++)
 307                         pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
 308                 for (n = 0; n < count; n++)
 309                         pthread_join(threads[n].thread, NULL);
 310                 gettimeofday(&end, NULL);
 311
 312                 tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
 313                 igt_info("Contended upload rate for %d tiled threads:   %7.3fMiB/s\n", count, tiled[count != 2]);
 314
 315                 for (n = 0; n < count; n++) {
 316                         gem_close(fd, threads[n].handle);
 317                 }
 318         }
 319
 320         errno = 0;
 321         igt_assert(linear[1] > 0.75 * linear[0]);
 322         igt_assert(tiled[1] > 0.75 * tiled[0]);
 323 }
 324
 325 igt_main
 326 {
 327         igt_skip_on_simulation();
 328
 329         igt_subtest("performance")
 330                 performance();
 331         igt_subtest("thread-contention")
 332                 thread_contention();
 333         igt_subtest("thread-performance-read")
 334                 thread_performance(READ);
 335         igt_subtest("thread-performance-write")
 336                 thread_performance(WRITE);
 337         igt_subtest("thread-performance-both")
 338                 thread_performance(READ | WRITE);
 339 }