2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Chris Wilson <chris@chris-wilson.co.uk>
44 #include "ioctl_wrappers.h"
46 #define OBJECT_SIZE (1024*1024) /* restricted to 1MiB alignment on i915 fences */
48 static double elapsed(const struct timeval *start,
49 const struct timeval *end)
51 return (end->tv_sec - start->tv_sec) + 1e-6*(end->tv_usec - start->tv_usec);
54 static void performance(void)
58 double linear[2], tiled[2];
62 num_fences = gem_available_fences(fd);
63 igt_require(num_fences > 0);
65 for (count = 2; count < 4*num_fences; count *= 2) {
66 struct timeval start, end;
67 uint32_t handle[count];
70 for (n = 0; n < count; n++) {
71 handle[n] = gem_create(fd, OBJECT_SIZE);
72 ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
76 gettimeofday(&start, NULL);
77 for (loop = 0; loop < 1024; loop++) {
78 for (n = 0; n < count; n++)
79 memset(ptr[n], 0, OBJECT_SIZE);
81 gettimeofday(&end, NULL);
83 linear[count != 2] = count * loop / elapsed(&start, &end);
84 igt_info("Upload rate for %d linear surfaces: %7.3fMiB/s\n", count, linear[count != 2]);
86 for (n = 0; n < count; n++)
87 gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
89 gettimeofday(&start, NULL);
90 for (loop = 0; loop < 1024; loop++) {
91 for (n = 0; n < count; n++)
92 memset(ptr[n], 0, OBJECT_SIZE);
94 gettimeofday(&end, NULL);
96 tiled[count != 2] = count * loop / elapsed(&start, &end);
97 igt_info("Upload rate for %d tiled surfaces: %7.3fMiB/s\n", count, tiled[count != 2]);
99 for (n = 0; n < count; n++) {
100 munmap(ptr[n], OBJECT_SIZE);
101 gem_close(fd, handle[n]);
107 igt_assert(linear[1] > 0.75 * linear[0]);
108 igt_assert(tiled[1] > 0.75 * tiled[0]);
111 struct thread_performance {
113 int id, count, direction, loops;
117 static void *read_thread_performance(void *closure)
119 struct thread_performance *t = closure;
123 for (n = 0; n < t->loops; n++) {
124 uint32_t *src = t->ptr[rand() % t->count];
125 src += (rand() % 256) * 4096 / 4;
126 for (m = 0; m < 4096/4; m++)
130 return (void *)(uintptr_t)x;
133 static void *write_thread_performance(void *closure)
135 struct thread_performance *t = closure;
138 for (n = 0; n < t->loops; n++) {
139 uint32_t *dst = t->ptr[rand() % t->count];
140 dst += (rand() % 256) * 4096 / 4;
141 memset(dst, 0, 4096);
149 static const char *direction_string(unsigned mask)
152 case READ: return "Download";
153 case WRITE: return "Upload";
154 case READ | WRITE: return "Combined";
155 default: return "Unknown";
158 static void thread_performance(unsigned mask)
160 const int loops = 4096;
163 double linear[2], tiled[2];
167 num_fences = gem_available_fences(fd);
168 igt_require(num_fences > 0);
170 for (count = 2; count < 4*num_fences; count *= 2) {
171 const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0);
172 struct timeval start, end;
173 struct thread_performance readers[count];
174 struct thread_performance writers[count];
175 uint32_t handle[count];
178 for (n = 0; n < count; n++) {
179 handle[n] = gem_create(fd, OBJECT_SIZE);
180 ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
185 readers[n].direction = READ;
186 readers[n].ptr = ptr;
187 readers[n].count = count;
188 readers[n].loops = loops;
192 writers[n].id = count - n - 1;
193 writers[n].direction = WRITE;
194 writers[n].ptr = ptr;
195 writers[n].count = count;
196 writers[n].loops = loops;
200 gettimeofday(&start, NULL);
201 for (n = 0; n < count; n++) {
203 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
205 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
207 for (n = 0; n < count; n++) {
209 pthread_join(readers[n].thread, NULL);
211 pthread_join(writers[n].thread, NULL);
213 gettimeofday(&end, NULL);
215 linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
216 igt_info("%s rate for %d linear surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]);
218 for (n = 0; n < count; n++)
219 gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
221 gettimeofday(&start, NULL);
222 for (n = 0; n < count; n++) {
224 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
226 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
228 for (n = 0; n < count; n++) {
230 pthread_join(readers[n].thread, NULL);
232 pthread_join(writers[n].thread, NULL);
234 gettimeofday(&end, NULL);
236 tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
237 igt_info("%s rate for %d tiled surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]);
239 for (n = 0; n < count; n++) {
240 munmap(ptr[n], OBJECT_SIZE);
241 gem_close(fd, handle[n]);
246 igt_assert(linear[1] > 0.75 * linear[0]);
247 igt_assert(tiled[1] > 0.75 * tiled[0]);
250 struct thread_contention {
255 static void *no_contention(void *closure)
257 struct thread_contention *t = closure;
260 for (n = 0; n < t->loops; n++) {
261 uint32_t *ptr = gem_mmap(t->fd, t->handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
263 memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
264 munmap(ptr, OBJECT_SIZE);
270 static void thread_contention(void)
272 const int loops = 4096;
275 double linear[2], tiled[2];
279 num_fences = gem_available_fences(fd);
280 igt_require(num_fences > 0);
282 for (count = 1; count < 4*num_fences; count *= 2) {
283 struct timeval start, end;
284 struct thread_contention threads[count];
286 for (n = 0; n < count; n++) {
287 threads[n].handle = gem_create(fd, OBJECT_SIZE);
288 threads[n].loops = loops;
292 gettimeofday(&start, NULL);
293 for (n = 0; n < count; n++)
294 pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
295 for (n = 0; n < count; n++)
296 pthread_join(threads[n].thread, NULL);
297 gettimeofday(&end, NULL);
299 linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
300 igt_info("Contended upload rate for %d linear threads: %7.3fMiB/s\n", count, linear[count != 2]);
302 for (n = 0; n < count; n++)
303 gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
305 gettimeofday(&start, NULL);
306 for (n = 0; n < count; n++)
307 pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
308 for (n = 0; n < count; n++)
309 pthread_join(threads[n].thread, NULL);
310 gettimeofday(&end, NULL);
312 tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
313 igt_info("Contended upload rate for %d tiled threads: %7.3fMiB/s\n", count, tiled[count != 2]);
315 for (n = 0; n < count; n++) {
316 gem_close(fd, threads[n].handle);
321 igt_assert(linear[1] > 0.75 * linear[0]);
322 igt_assert(tiled[1] > 0.75 * tiled[0]);
327 igt_skip_on_simulation();
329 igt_subtest("performance")
331 igt_subtest("thread-contention")
333 igt_subtest("thread-performance-read")
334 thread_performance(READ);
335 igt_subtest("thread-performance-write")
336 thread_performance(WRITE);
337 igt_subtest("thread-performance-both")
338 thread_performance(READ | WRITE);