tests/bdw: pwrite_pread
[platform/upstream/intel-gpu-tools.git] / tests / gem_pwrite_pread.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/time.h>
39 #include "drm.h"
40 #include "i915_drm.h"
41 #include "drmtest.h"
42 #include "intel_bufmgr.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_gpu_tools.h"
45
46 #define OBJECT_SIZE 16384
47
48 #define COPY_BLT_CMD            (2<<29|0x53<<22|0x6)
49 #define BLT_WRITE_ALPHA         (1<<21)
50 #define BLT_WRITE_RGB           (1<<20)
51 #define BLT_SRC_TILED           (1<<15)
52 #define BLT_DST_TILED           (1<<11)
53
54 uint32_t devid;
55
56 static inline void build_batch(uint32_t *batch, int len, uint32_t *batch_len)
57 {
58         unsigned int i = 0;
59
60         batch[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
61         batch[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | len;
62         batch[i++] = 0;
63         batch[i++] = 1 << 16 | (len / 4);
64         batch[i++] = 0; /* dst */
65         if (intel_gen(devid) >= 8)
66                 batch[i++] = 0; /* FIXME */
67         batch[i++] = 0;
68         batch[i++] = len;
69         batch[i++] = 0; /* src */
70         if (intel_gen(devid) >= 8)
71                 batch[i++] = 0; /* FIXME */
72         batch[i++] = MI_BATCH_BUFFER_END;
73         batch[i++] = 0;
74
75         *batch_len = i * 4;
76 }
77
78 #define GPP_BATCH_SIZE (12 * 4)
79
80 static void copy(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
81 {
82         uint32_t batch[GPP_BATCH_SIZE] = {0};
83
84         struct drm_i915_gem_relocation_entry reloc[] = {
85                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
86                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
87         };
88         struct drm_i915_gem_exec_object2 exec[] = {
89                 { src },
90                 { dst },
91                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
92         };
93         struct drm_i915_gem_execbuffer2 execbuf = {
94                 (uintptr_t)exec, 3,
95                 0, 0,
96                 0, 0, 0, 0,
97                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
98         };
99
100         build_batch(batch, len, &execbuf.batch_len);
101
102         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
103
104         while (loops--) {
105                 gem_write(fd, src, 0, buf, len);
106                 do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
107                 gem_read(fd, dst, 0, buf, len);
108         }
109
110         gem_close(fd, exec[2].handle);
111 }
112
113 static void as_gtt_mmap(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
114 {
115         uint32_t batch[GPP_BATCH_SIZE] = {0};
116
117         struct drm_i915_gem_relocation_entry reloc[] = {
118                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
119                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
120         };
121         struct drm_i915_gem_exec_object2 exec[] = {
122                 { src },
123                 { dst },
124                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
125         };
126         struct drm_i915_gem_execbuffer2 execbuf = {
127                 (uintptr_t)exec, 3,
128                 0, GPP_BATCH_SIZE,
129                 0, 0, 0, 0,
130                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
131         };
132         uint32_t *src_ptr, *dst_ptr;
133
134         build_batch(batch, len, &execbuf.batch_len);
135
136         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
137
138         src_ptr = gem_mmap__gtt(fd, src, OBJECT_SIZE, PROT_WRITE);
139         dst_ptr = gem_mmap__gtt(fd, dst, OBJECT_SIZE, PROT_READ);
140
141         while (loops--) {
142                 gem_set_domain(fd, src,
143                                I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
144                 memcpy(src_ptr, buf, len);
145
146                 do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
147                 gem_set_domain(fd, dst,
148                                I915_GEM_DOMAIN_GTT, 0);
149                 memcpy(buf, dst_ptr, len);
150         }
151
152         munmap(dst_ptr, len);
153         munmap(src_ptr, len);
154         gem_close(fd, exec[2].handle);
155 }
156
157
158 static void as_cpu_mmap(int fd, uint32_t src, uint32_t dst, void *buf, int len, int loops)
159 {
160         uint32_t batch[GPP_BATCH_SIZE] = {0};
161
162         struct drm_i915_gem_relocation_entry reloc[] = {
163                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
164                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
165         };
166         struct drm_i915_gem_exec_object2 exec[] = {
167                 { src },
168                 { dst },
169                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
170         };
171         struct drm_i915_gem_execbuffer2 execbuf = {
172                 (uintptr_t)exec, 3,
173                 0, GPP_BATCH_SIZE,
174                 0, 0, 0, 0,
175                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
176         };
177         uint32_t *src_ptr, *dst_ptr;
178
179         build_batch(batch, len, &execbuf.batch_len);
180
181         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
182
183         src_ptr = gem_mmap__cpu(fd, src, OBJECT_SIZE, PROT_WRITE);
184         dst_ptr = gem_mmap__cpu(fd, dst, OBJECT_SIZE, PROT_READ);
185
186         while (loops--) {
187                 gem_set_domain(fd, src,
188                                I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
189                 memcpy(src_ptr, buf, len);
190
191                 do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
192                 gem_set_domain(fd, dst,
193                                I915_GEM_DOMAIN_CPU, 0);
194                 memcpy(buf, dst_ptr, len);
195         }
196
197         munmap(dst_ptr, len);
198         munmap(src_ptr, len);
199         gem_close(fd, exec[2].handle);
200 }
201
202 static void test_copy(int fd, uint32_t src, uint32_t dst, uint32_t *buf, int len)
203 {
204         uint32_t batch[GPP_BATCH_SIZE] = {0};
205
206         struct drm_i915_gem_relocation_entry reloc[] = {
207                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
208                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
209         };
210         struct drm_i915_gem_exec_object2 exec[] = {
211                 { src },
212                 { dst },
213                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
214         };
215         struct drm_i915_gem_execbuffer2 execbuf = {
216                 (uintptr_t)exec, 3,
217                 0, GPP_BATCH_SIZE,
218                 0, 0, 0, 0,
219                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
220         };
221         int i;
222
223         build_batch(batch, len, &execbuf.batch_len);
224
225         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
226
227         for (i = 0; i < len/4; i++)
228                 buf[i] = i;
229
230         gem_write(fd, src, 0, buf, len);
231         memset(buf, 0, len);
232
233         do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
234         gem_read(fd, dst, 0, buf, len);
235
236         gem_close(fd, exec[2].handle);
237
238         for (i = 0; i < len/4; i++)
239                 igt_assert(buf[i] == i);
240 }
241
242 static void test_as_gtt_mmap(int fd, uint32_t src, uint32_t dst, int len)
243 {
244         uint32_t batch[GPP_BATCH_SIZE] = {0};
245
246         struct drm_i915_gem_relocation_entry reloc[] = {
247                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
248                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
249         };
250         struct drm_i915_gem_exec_object2 exec[] = {
251                 { src },
252                 { dst },
253                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
254         };
255         struct drm_i915_gem_execbuffer2 execbuf = {
256                 (uintptr_t)exec, 3,
257                 0, GPP_BATCH_SIZE,
258                 0, 0, 0, 0,
259                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
260         };
261         uint32_t *src_ptr, *dst_ptr;
262         int i;
263
264         build_batch(batch, len, &execbuf.batch_len);
265
266         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
267
268         src_ptr = gem_mmap__gtt(fd, src, OBJECT_SIZE, PROT_WRITE);
269         dst_ptr = gem_mmap__gtt(fd, dst, OBJECT_SIZE, PROT_READ);
270
271         gem_set_domain(fd, src, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
272         for (i = 0; i < len/4; i++)
273                 src_ptr[i] = i;
274
275         do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
276         gem_close(fd, exec[2].handle);
277
278         gem_set_domain(fd, dst, I915_GEM_DOMAIN_GTT, 0);
279         for (i = 0; i < len/4; i++)
280                 igt_assert(dst_ptr[i] == i);
281
282         munmap(dst_ptr, len);
283         munmap(src_ptr, len);
284 }
285
286 static void test_as_cpu_mmap(int fd, uint32_t src, uint32_t dst, int len)
287 {
288         uint32_t batch[GPP_BATCH_SIZE] = {0};
289
290         struct drm_i915_gem_relocation_entry reloc[] = {
291                 { dst, 0, 4*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER },
292                 { src, 0, 7*sizeof(uint32_t), 0, I915_GEM_DOMAIN_RENDER, 0 },
293         };
294         struct drm_i915_gem_exec_object2 exec[] = {
295                 { src },
296                 { dst },
297                 { gem_create(fd, 4096), 2, (uintptr_t)reloc }
298         };
299         struct drm_i915_gem_execbuffer2 execbuf = {
300                 (uintptr_t)exec, 3,
301                 0, GPP_BATCH_SIZE,
302                 0, 0, 0, 0,
303                 HAS_BLT_RING(devid) ? I915_EXEC_BLT : 0,
304         };
305         uint32_t *src_ptr, *dst_ptr;
306         int i;
307
308         build_batch(batch, len, &execbuf.batch_len);
309
310         gem_write(fd, exec[2].handle, 0, batch, execbuf.batch_len);
311
312         src_ptr = gem_mmap__cpu(fd, src, OBJECT_SIZE, PROT_WRITE);
313         dst_ptr = gem_mmap__cpu(fd, dst, OBJECT_SIZE, PROT_READ);
314
315         gem_set_domain(fd, src, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
316         for (i = 0; i < len/4; i++)
317                 src_ptr[i] = i;
318
319         do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf));
320         gem_close(fd, exec[2].handle);
321
322         gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
323         for (i = 0; i < len/4; i++)
324                 igt_assert(dst_ptr[i] == i);
325
326         munmap(dst_ptr, len);
327         munmap(src_ptr, len);
328 }
329
330 static double elapsed(const struct timeval *start,
331                       const struct timeval *end,
332                       int loop)
333 {
334         return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
335 }
336
337 static const char *bytes_per_sec(char *buf, double v)
338 {
339         const char *order[] = {
340                 "",
341                 "KiB",
342                 "MiB",
343                 "GiB",
344                 "TiB",
345                 NULL,
346         }, **o = order;
347
348         while (v > 1000 && o[1]) {
349                 v /= 1000;
350                 o++;
351         }
352         sprintf(buf, "%.1f%s/s", v, *o);
353         return buf;
354 }
355
356 uint32_t *tmp, src, dst;
357 int fd;
358
359 int main(int argc, char **argv)
360 {
361         int object_size = 0;
362         uint32_t buf[20];
363         int count;
364
365         igt_subtest_init(argc, argv);
366         igt_skip_on_simulation();
367
368         if (argc > 1)
369                 object_size = atoi(argv[1]);
370         if (object_size == 0)
371                 object_size = OBJECT_SIZE;
372         object_size = (object_size + 3) & -4;
373
374         igt_fixture {
375                 fd = drm_open_any();
376
377                 dst = gem_create(fd, object_size);
378                 src = gem_create(fd, object_size);
379                 tmp = malloc(object_size);
380
381                 gem_set_caching(fd, src, 0);
382                 gem_set_caching(fd, dst, 0);
383         }
384
385         devid = intel_get_drm_devid(fd);
386
387         igt_subtest("uncached-copy-correctness")
388                 test_copy(fd, src, dst, tmp, object_size);
389         igt_subtest("uncached-copy-performance") {
390                 for (count = 1; count <= 1<<17; count <<= 1) {
391                         struct timeval start, end;
392
393                         gettimeofday(&start, NULL);
394                         copy(fd, src, dst, tmp, object_size, count);
395                         gettimeofday(&end, NULL);
396                         printf("Time to uncached copy %d bytes x %6d:   %7.3fµs, %s\n",
397                                object_size, count,
398                                elapsed(&start, &end, count),
399                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
400                         fflush(stdout);
401                 }
402         }
403
404         igt_subtest("uncached-pwrite-blt-gtt_mmap-correctness")
405                 test_as_gtt_mmap(fd, src, dst, object_size);
406         igt_subtest("uncached-pwrite-blt-gtt_mmap-performance") {
407                 for (count = 1; count <= 1<<17; count <<= 1) {
408                         struct timeval start, end;
409
410                         gettimeofday(&start, NULL);
411                         as_gtt_mmap(fd, src, dst, tmp, object_size, count);
412                         gettimeofday(&end, NULL);
413                         printf("** mmap uncached copy %d bytes x %6d:   %7.3fµs, %s\n",
414                                object_size, count,
415                                elapsed(&start, &end, count),
416                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
417                         fflush(stdout);
418                 }
419         }
420
421         igt_fixture {
422                 gem_set_caching(fd, src, 1);
423                 gem_set_caching(fd, dst, 1);
424         }
425
426         igt_subtest("snooped-copy-correctness")
427                 test_copy(fd, src, dst, tmp, object_size);
428         igt_subtest("snooped-copy-performance") {
429                 for (count = 1; count <= 1<<17; count <<= 1) {
430                         struct timeval start, end;
431
432                         gettimeofday(&start, NULL);
433                         copy(fd, src, dst, tmp, object_size, count);
434                         gettimeofday(&end, NULL);
435                         printf("Time to snooped copy %d bytes x %6d:    %7.3fµs, %s\n",
436                                object_size, count,
437                                elapsed(&start, &end, count),
438                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
439                         fflush(stdout);
440                 }
441         }
442
443         igt_subtest("snooped-pwrite-blt-cpu_mmap-correctness")
444                 test_as_cpu_mmap(fd, src, dst, object_size);
445         igt_subtest("snooped-pwrite-blt-cpu_mmap-performance") {
446                 for (count = 1; count <= 1<<17; count <<= 1) {
447                         struct timeval start, end;
448
449                         gettimeofday(&start, NULL);
450                         as_cpu_mmap(fd, src, dst, tmp, object_size, count);
451                         gettimeofday(&end, NULL);
452                         printf("** mmap snooped copy %d bytes x %6d:    %7.3fµs, %s\n",
453                                object_size, count,
454                                elapsed(&start, &end, count),
455                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
456                         fflush(stdout);
457                 }
458         }
459
460         igt_fixture {
461                 gem_set_caching(fd, src, 2);
462                 gem_set_caching(fd, dst, 2);
463         }
464
465         igt_subtest("display-copy-correctness")
466                 test_copy(fd, src, dst, tmp, object_size);
467         igt_subtest("display-copy-performance") {
468                 for (count = 1; count <= 1<<17; count <<= 1) {
469                         struct timeval start, end;
470
471                         gettimeofday(&start, NULL);
472                         copy(fd, src, dst, tmp, object_size, count);
473                         gettimeofday(&end, NULL);
474                         printf("Time to display copy %d bytes x %6d:    %7.3fµs, %s\n",
475                                object_size, count,
476                                elapsed(&start, &end, count),
477                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
478                         fflush(stdout);
479                 }
480         }
481
482         igt_subtest("display-pwrite-blt-gtt_mmap-correctness")
483                 test_as_gtt_mmap(fd, src, dst, object_size);
484         igt_subtest("display-pwrite-blt-gtt_mmap-performance") {
485                 for (count = 1; count <= 1<<17; count <<= 1) {
486                         struct timeval start, end;
487
488                         gettimeofday(&start, NULL);
489                         as_gtt_mmap(fd, src, dst, tmp, object_size, count);
490                         gettimeofday(&end, NULL);
491                         printf("** mmap display copy %d bytes x %6d:    %7.3fµs, %s\n",
492                                object_size, count,
493                                elapsed(&start, &end, count),
494                                bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
495                         fflush(stdout);
496                 }
497         }
498
499         igt_fixture {
500                 free(tmp);
501                 gem_close(fd, src);
502                 gem_close(fd, dst);
503
504                 close(fd);
505         }
506
507         igt_exit();
508 }