From 1a80405a36eae6777cf8dc809da5883ba666f41c Mon Sep 17 00:00:00 2001 From: Ryan Lortie Date: Fri, 3 Jun 2011 20:29:41 +0200 Subject: [PATCH] bitlock: hand-code assembly version for x86 The __sync_fetch_and_or() operation on x86 is a bit suboptimal when the result isn't ignored. Normally we could use the 'lock or' assembly instruction to accomplish this, but this instruction discards the previous value. In order to work around this issue, GCC is forced to emit a compare-and-exchange loop. We can easily use the 'lock bts' instruction, though. It can't be used in the general case for __sync_fetch_and_or() but it works great for our case (test and set a single bit). I filed a bug against GCC[1] to get this exposed as a new intrinsic (or have the optimiser detect the case) but until then we'll hand-code it on x86 and amd64. The uncontended case sees a 31% improvement on my test machine. [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49244 https://bugzilla.gnome.org/show_bug.cgi?id=651467 --- glib/gbitlock.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/glib/gbitlock.c b/glib/gbitlock.c index 4e91e9a..75e045c 100644 --- a/glib/gbitlock.c +++ b/glib/gbitlock.c @@ -205,6 +205,33 @@ void g_bit_lock (volatile gint *address, gint lock_bit) { +#if defined (__GNUC__) && (defined (i386) || defined (__amd64__)) + retry: + asm volatile goto ("lock bts %1, (%0)\n" + "jc %l[contended]" + : /* no output */ + : "r" (address), "r" (lock_bit) + : "cc", "memory" + : contended); + return; + + contended: + { + guint mask = 1u << lock_bit; + guint v; + + v = g_atomic_int_get (address); + if (v & mask) + { + guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended); + + g_atomic_int_add (&g_bit_lock_contended[class], +1); + g_futex_wait (address, v); + g_atomic_int_add (&g_bit_lock_contended[class], -1); + } + } + goto retry; +#else guint mask = 1u << lock_bit; guint v; @@ -221,6 +248,7 @@ g_bit_lock (volatile gint *address, goto retry; } +#endif } /** @@ -248,12 +276,25 @@ gboolean g_bit_trylock (volatile gint *address, gint lock_bit) { +#if defined (__GNUC__) && (defined (i386) || defined (__amd64__)) + gboolean result; + + asm volatile ("lock bts %2, (%1)\n" + "setnc %%al\n" + "movzx %%al, %0" + : "=r" (result) + : "r" (address), "r" (lock_bit) + : "cc", "memory"); + + return result; +#else guint mask = 1u << lock_bit; guint v; v = g_atomic_int_or (address, mask); return ~v & mask; +#endif } /** @@ -275,11 +316,21 @@ void g_bit_unlock (volatile gint *address, gint lock_bit) { - guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended); +#if defined (__GNUC__) && (defined (i386) || defined (__amd64__)) + asm volatile ("lock btr %1, (%0)" + : /* no output */ + : "r" (address), "r" (lock_bit) + : "cc", "memory"); +#else guint mask = 1u << lock_bit; g_atomic_int_and (address, ~mask); +#endif + + { + guint class = ((gsize) address) % G_N_ELEMENTS (g_bit_lock_contended); - if (g_atomic_int_get (&g_bit_lock_contended[class])) - g_futex_wake (address); + if (g_atomic_int_get (&g_bit_lock_contended[class])) + g_futex_wake (address); + } } -- 2.7.4