igt/gem_userptr_blits: Fix forked access test
[platform/upstream/intel-gpu-tools.git] / tests / gem_exec_blt.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/time.h>
39 #include "drm.h"
40 #include "ioctl_wrappers.h"
41 #include "drmtest.h"
42 #include "intel_chipset.h"
43 #include "intel_io.h"
44
45 #define OBJECT_SIZE 16384
46
47 #define COPY_BLT_CMD            (2<<29|0x53<<22|0x6)
48 #define BLT_WRITE_ALPHA         (1<<21)
49 #define BLT_WRITE_RGB           (1<<20)
50 #define BLT_SRC_TILED           (1<<15)
51 #define BLT_DST_TILED           (1<<11)
52
53 static int gem_linear_blt(int fd,
54                           uint32_t *batch,
55                           uint32_t src,
56                           uint32_t dst,
57                           uint32_t length,
58                           struct drm_i915_gem_relocation_entry *reloc)
59 {
60         uint32_t *b = batch;
61         int height = length / (16 * 1024);
62
63         igt_assert(height <= 1<<16);
64
65         if (height) {
66                 int i = 0;
67                 b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
68                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
69                         b[i-1]+=2;
70                 b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
71                 b[i++] = 0;
72                 b[i++] = height << 16 | (4*1024);
73                 b[i++] = 0;
74                 reloc->offset = (b-batch+4) * sizeof(uint32_t);
75                 reloc->delta = 0;
76                 reloc->target_handle = dst;
77                 reloc->read_domains = I915_GEM_DOMAIN_RENDER;
78                 reloc->write_domain = I915_GEM_DOMAIN_RENDER;
79                 reloc->presumed_offset = 0;
80                 reloc++;
81                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
82                         b[i++] = 0; /* FIXME */
83
84                 b[i++] = 0;
85                 b[i++] = 16*1024;
86                 b[i++] = 0;
87                 reloc->offset = (b-batch+7) * sizeof(uint32_t);
88                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
89                         reloc->offset += sizeof(uint32_t);
90                 reloc->delta = 0;
91                 reloc->target_handle = src;
92                 reloc->read_domains = I915_GEM_DOMAIN_RENDER;
93                 reloc->write_domain = 0;
94                 reloc->presumed_offset = 0;
95                 reloc++;
96                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
97                         b[i++] = 0; /* FIXME */
98
99                 b += i;
100                 length -= height * 16*1024;
101         }
102
103         if (length) {
104                 int i = 0;
105                 b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
106                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
107                         b[i-1]+=2;
108                 b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
109                 b[i++] = height << 16;
110                 b[i++] = (1+height) << 16 | (length / 4);
111                 b[i++] = 0;
112                 reloc->offset = (b-batch+4) * sizeof(uint32_t);
113                 reloc->delta = 0;
114                 reloc->target_handle = dst;
115                 reloc->read_domains = I915_GEM_DOMAIN_RENDER;
116                 reloc->write_domain = I915_GEM_DOMAIN_RENDER;
117                 reloc->presumed_offset = 0;
118                 reloc++;
119                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
120                         b[i++] = 0; /* FIXME */
121
122                 b[i++] = height << 16;
123                 b[i++] = 16*1024;
124                 b[i++] = 0;
125                 reloc->offset = (b-batch+7) * sizeof(uint32_t);
126                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
127                         reloc->offset += sizeof(uint32_t);
128                 reloc->delta = 0;
129                 reloc->target_handle = src;
130                 reloc->read_domains = I915_GEM_DOMAIN_RENDER;
131                 reloc->write_domain = 0;
132                 reloc->presumed_offset = 0;
133                 reloc++;
134                 if (intel_gen(intel_get_drm_devid(fd)) >= 8)
135                         b[i++] = 0; /* FIXME */
136
137                 b += i;
138         }
139
140         b[0] = MI_BATCH_BUFFER_END;
141         b[1] = 0;
142
143         return (b+2 - batch) * sizeof(uint32_t);
144 }
145
146 static double elapsed(const struct timeval *start,
147                       const struct timeval *end,
148                       int loop)
149 {
150         return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
151 }
152
153 static const char *bytes_per_sec(char *buf, double v)
154 {
155         const char *order[] = {
156                 "",
157                 "KiB",
158                 "MiB",
159                 "GiB",
160                 "TiB",
161                 "PiB",
162                 NULL,
163         }, **o = order;
164
165         while (v > 1024 && o[1]) {
166                 v /= 1024;
167                 o++;
168         }
169         sprintf(buf, "%.1f%s/s", v, *o);
170         return buf;
171 }
172
173 static void run(int object_size)
174 {
175         struct drm_i915_gem_execbuffer2 execbuf;
176         struct drm_i915_gem_exec_object2 exec[3];
177         struct drm_i915_gem_relocation_entry reloc[4];
178         uint32_t buf[20];
179         uint32_t handle, src, dst;
180         int fd, len, count;
181         int ring;
182
183         fd = drm_open_any();
184         handle = gem_create(fd, 4096);
185         src = gem_create(fd, object_size);
186         dst = gem_create(fd, object_size);
187
188         len = gem_linear_blt(fd, buf, src, dst, object_size, reloc);
189         gem_write(fd, handle, 0, buf, len);
190
191         exec[0].handle = src;
192         exec[0].relocation_count = 0;
193         exec[0].relocs_ptr = 0;
194         exec[0].alignment = 0;
195         exec[0].offset = 0;
196         exec[0].flags = 0;
197         exec[0].rsvd1 = 0;
198         exec[0].rsvd2 = 0;
199
200         exec[1].handle = dst;
201         exec[1].relocation_count = 0;
202         exec[1].relocs_ptr = 0;
203         exec[1].alignment = 0;
204         exec[1].offset = 0;
205         exec[1].flags = 0;
206         exec[1].rsvd1 = 0;
207         exec[1].rsvd2 = 0;
208
209         exec[2].handle = handle;
210         if (intel_gen(intel_get_drm_devid(fd)) >= 8)
211                 exec[2].relocation_count = len > 56 ? 4 : 2;
212         else
213                 exec[2].relocation_count = len > 40 ? 4 : 2;
214         exec[2].relocs_ptr = (uintptr_t)reloc;
215         exec[2].alignment = 0;
216         exec[2].offset = 0;
217         exec[2].flags = 0;
218         exec[2].rsvd1 = 0;
219         exec[2].rsvd2 = 0;
220
221         ring = 0;
222         if (HAS_BLT_RING(intel_get_drm_devid(fd)))
223                 ring = I915_EXEC_BLT;
224
225         execbuf.buffers_ptr = (uintptr_t)exec;
226         execbuf.buffer_count = 3;
227         execbuf.batch_start_offset = 0;
228         execbuf.batch_len = len;
229         execbuf.cliprects_ptr = 0;
230         execbuf.num_cliprects = 0;
231         execbuf.DR1 = 0;
232         execbuf.DR4 = 0;
233         execbuf.flags = ring;
234         i915_execbuffer2_set_context_id(execbuf, 0);
235         execbuf.rsvd2 = 0;
236
237         for (count = 1; count <= 1<<12; count <<= 1) {
238                 struct timeval start, end;
239
240                 gettimeofday(&start, NULL);
241                 for (int loop = 0; loop < count; loop++)
242                         gem_execbuf(fd, &execbuf);
243                 gem_sync(fd, handle);
244                 gettimeofday(&end, NULL);
245                 igt_info("Time to blt %d bytes x %6d:   %7.3fµs, %s\n",
246                          object_size, count,
247                          elapsed(&start, &end, count),
248                          bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
249                 fflush(stdout);
250         }
251         gem_close(fd, handle);
252
253         close(fd);
254 }
255
256 int main(int argc, char **argv)
257 {
258         int i;
259
260         igt_simple_init(argc, argv);
261
262         igt_skip_on_simulation();
263
264         if (argc > 1) {
265                 for (i = 1; i < argc; i++) {
266                         int object_size = atoi(argv[i]);
267                         if (object_size)
268                                 run((object_size + 3) & -4);
269                 }
270         } else
271                 run(OBJECT_SIZE);
272
273         return 0;
274 }