cutils: Add generic prefetch
authorRichard Henderson <rth@twiddle.net>
Mon, 29 Aug 2016 18:46:17 +0000 (11:46 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 13 Sep 2016 17:13:32 +0000 (19:13 +0200)
There's no real knowledge of the cacheline size,
just prefetching one loop ahead.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <1472496380-19706-7-git-send-email-rth@twiddle.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
util/bufferiszero.c

index bafd3d1..abe65f9 100644 (file)
@@ -38,6 +38,8 @@ static bool NAME(const void *buf, size_t len)                   \
     do {                                                        \
         const VECTYPE *p = buf;                                 \
         VECTYPE t;                                              \
+        __builtin_prefetch(buf + SIZE);                         \
+        barrier();                                              \
         if (SIZE == sizeof(VECTYPE) * 4) {                      \
             t = (p[0] | p[1]) | (p[2] | p[3]);                  \
         } else if (SIZE == sizeof(VECTYPE) * 8) {               \
@@ -219,6 +221,9 @@ bool buffer_is_zero(const void *buf, size_t len)
         return true;
     }
 
+    /* Fetch the beginning of the buffer while we select the accelerator.  */
+    __builtin_prefetch(buf);
+
     /* Use an optimized zero check if possible.  Note that this also
        includes a check for an unrolled loop over 64-bit integers.  */
     return select_accel_fn(buf, len);