From 5e207f825bd1ed3142a623bcbceca00508907c5e Mon Sep 17 00:00:00 2001 From: Ben Avison Date: Wed, 6 Feb 2013 00:39:12 +0000 Subject: [PATCH] Fix to lowlevel-blt-bench The source, mask and destination buffers are initialised to 0xCC just after they are allocated. Between each benchmark, there are a pair of memcpys, from the destination buffer to the source buffer and back again (there are no explanatory comments, but presumably this is an effort to flush the caches). However, it has an unintended consequence, which is to change the contents of the buffers on entry to subsequent benchmarks. This means it is not a fair test: for example, with over_n_8888 (featured in the following patches) it reports L2 and even M tests as being faster than the L1 test, because after the L1 test, the source buffer is filled with fully opaque pixels, for which over_n_8888 has a shortcut. The fix here is simply to reverse the order of the memcpys, so src and destination are both filled with 0xCC on entry to all tests. --- test/lowlevel-blt-bench.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c index 8e80b42..4e16f7b 100644 --- a/test/lowlevel-blt-bench.c +++ b/test/lowlevel-blt-bench.c @@ -460,8 +460,8 @@ bench_composite (char * testname, printf ("%24s %c", testname, func != pixman_image_composite_wrapper ? '-' : '='); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); l1test_width = L1CACHE_SIZE / 8 - 64; if (l1test_width < 1) @@ -480,8 +480,8 @@ bench_composite (char * testname, ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); nlines = (L2CACHE_SIZE / l1test_width) / ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8); @@ -499,8 +499,8 @@ bench_composite (char * testname, ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (WIDTH * HEIGHT); t1 = gettime (); @@ -515,8 +515,8 @@ bench_composite (char * testname, ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) ); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -529,8 +529,8 @@ bench_composite (char * testname, printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -543,8 +543,8 @@ bench_composite (char * testname, printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -557,8 +557,8 @@ bench_composite (char * testname, printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH); t1 = gettime (); -- 2.7.4