Optimize __libc_lock_lock and __libc_lock_trylock for MIPS.
authorMaxim Kuvyrkov <maxim@codesourcery.com>
Wed, 15 Aug 2012 23:44:30 +0000 (16:44 -0700)
committerMaxim Kuvyrkov <maxim@codesourcery.com>
Wed, 15 Aug 2012 23:44:30 +0000 (16:44 -0700)
nptl/ChangeLog
nptl/sysdeps/pthread/bits/libc-lockP.h
ports/ChangeLog.mips
ports/sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h

index 0f31b4d..545b2c2 100644 (file)
@@ -1,3 +1,9 @@
+2012-08-15  Tom de Vries  <vries@codesourcery.com>
+           Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+       * sysdeps/pthread/bits/libc-lockP.h (__libc_lock_lock)
+       (__libc_lock_trylock): Allow pre-existing definitions.
+
 2012-08-15  Maxim Kuvyrkov  <maxim@codesourcery.com>
 
        * pthread_spin_lock.c: New file.
index 0ebac91..7adaeb4 100644 (file)
@@ -176,9 +176,12 @@ typedef pthread_key_t __libc_key_t;
 
 /* Lock the named lock variable.  */
 #if !defined NOT_IN_libc || defined IS_IN_libpthread
-# define __libc_lock_lock(NAME) \
+# ifndef __libc_lock_lock
+#  define __libc_lock_lock(NAME) \
   ({ lll_lock (NAME, LLL_PRIVATE); 0; })
+# endif
 #else
+# undef __libc_lock_lock
 # define __libc_lock_lock(NAME) \
   __libc_maybe_call (__pthread_mutex_lock, (&(NAME)), 0)
 #endif
@@ -189,9 +192,12 @@ typedef pthread_key_t __libc_key_t;
 
 /* Try to lock the named lock variable.  */
 #if !defined NOT_IN_libc || defined IS_IN_libpthread
-# define __libc_lock_trylock(NAME) \
+# ifndef __libc_lock_trylock
+#  define __libc_lock_trylock(NAME) \
   lll_trylock (NAME)
+# endif
 #else
+# undef __libc_lock_trylock
 # define __libc_lock_trylock(NAME) \
   __libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)
 #endif
index 5187772..f8c18b3 100644 (file)
@@ -1,3 +1,9 @@
+2012-08-15  Tom de Vries  <vries@codesourcery.com>
+           Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+       * sysdeps/unix/sysv/linux/mips/nptl/lowlevellock.h (__libc_lock_lock)
+       (__libc_lock_trylock): Define versions optimized for MIPS.
+
 2012-08-15  Maxim Kuvyrkov  <maxim@codesourcery.com>
 
        * sysdeps/mips/nptl/pthread_spin_lock.S: Remove, use generic version.
index 88b601e..d368ae1 100644 (file)
@@ -1,5 +1,4 @@
-/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008,
-   2009 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -291,4 +290,40 @@ extern int __lll_timedwait_tid (int *, const struct timespec *)
     __res;                                             \
   })
 
+/* Implement __libc_lock_lock using exchange_and_add, which expands into
+   a single instruction on XLP processors.  We enable this for all MIPS
+   processors as atomic_exchange_and_add_acq and
+   atomic_compare_and_exchange_acq take the same time to execute.
+   This is a simplified expansion of ({ lll_lock (NAME, LLL_PRIVATE); 0; }).
+
+   Note: __lll_lock_wait_private() resets lock value to '2', which prevents
+   unbounded increase of the lock value and [with billions of threads]
+   overflow.  */
+#define __libc_lock_lock(NAME)                                         \
+  ({                                                                   \
+    int *__futex = &(NAME);                                            \
+    if (__builtin_expect (atomic_exchange_and_add_acq (__futex, 1), 0))        \
+      __lll_lock_wait_private (__futex);                               \
+    0;                                                                 \
+  })
+
+#ifdef _MIPS_ARCH_XLP
+/* The generic version using a single atomic_compare_and_exchange_acq takes
+   less time for non-XLP processors, so we use below for XLP only.  */
+# define __libc_lock_trylock(NAME)                                     \
+  ({                                                                   \
+  int *__futex = &(NAME);                                              \
+  int __result = atomic_exchange_and_add_acq (__futex, 1);             \
+  /* If __result == 0, we succeeded in acquiring the lock.             \
+     If __result == 1, we switched the lock to 'contended' state, which        \
+     will cause a [possibly unnecessary] call to lll_futex_wait.  This is \
+     unlikely, so we accept the possible inefficiency.                 \
+     If __result >= 2, we need to set the lock to 'contended' state to avoid \
+     unbounded increase from subsequent trylocks.  */                  \
+  if (__result >= 2)                                                   \
+    __result = atomic_exchange_acq (__futex, 2);                       \
+  __result;                                                            \
+  })
+#endif
+
 #endif /* lowlevellock.h */