2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
+ * sysdeps/unix/sysv/linux/s390/lowlevellock.h
+ (__lll_unlock_elision, lll_unlock_elision): Add adapt_count argument.
+ * sysdeps/unix/sysv/linux/s390/elision-lock.c:
+ (__lll_lock_elision): Decrement adapt_count while unlocking
+ instead of before locking.
+ * sysdeps/unix/sysv/linux/s390/elision-trylock.c
+ (__lll_trylock_elision): Likewise.
+ * sysdeps/unix/sysv/linux/s390/elision-unlock.c:
+ (__lll_unlock_elision): Likewise.
+
+2016-12-20 Stefan Liebler <stli@linux.vnet.ibm.com>
+
* sysdeps/unix/sysv/linux/s390/htm.h(__libc_tbegin_retry): New macro.
* sysdeps/unix/sysv/linux/s390/elision-lock.c (__lll_lock_elision):
Use __libc_tbegin_retry macro.
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
- sufficient. */
- if (atomic_load_relaxed (adapt_count) > 0)
- {
- /* Lost updates are possible, but harmless. Due to races this might lead
- to *adapt_count becoming less than zero. */
- atomic_store_relaxed (adapt_count,
- atomic_load_relaxed (adapt_count) - 1);
- goto use_lock;
- }
-
- if (aconf.try_tbegin > 0)
+ sufficient.
+ Do not begin a transaction if another cpu has locked the
+ futex with normal locking. If adapt_count is zero, it remains and the
+ next pthread_mutex_lock call will try to start a transaction again. */
+ if (atomic_load_relaxed (futex) == 0
+ && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
int status = __libc_tbegin_retry ((void *) 0, aconf.try_tbegin - 1);
if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
_HTM_TBEGIN_STARTED))
{
- if (__builtin_expect (*futex == 0, 1))
+ /* Check the futex to make sure nobody has touched it in the
+ mean time. This forces the futex into the cache and makes
+ sure the transaction aborts if some other cpu uses the
+ lock (writes the futex). */
+ if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
/* Lock was free. Return to user code in a transaction. */
return 0;
/* Lock was busy. Fall back to normal locking. */
- if (__builtin_expect (__libc_tx_nesting_depth (), 1))
+ if (__builtin_expect (__libc_tx_nesting_depth () <= 1, 1))
{
/* In a non-nested transaction there is no need to abort,
- which is expensive. */
+ which is expensive. Simply end the started transaction. */
__libc_tend ();
/* Don't try to use transactions for the next couple of times.
See above for why relaxed MO is sufficient. */
is zero.
The adapt_count of this inner mutex is not changed,
because using the default lock with the inner mutex
- would abort the outer transaction.
- */
+ would abort the outer transaction. */
__libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
+ __builtin_unreachable ();
}
}
else if (status != _HTM_TBEGIN_TRANSIENT)
}
else
{
- /* Same logic as above, but for for a number of temporary failures in
- a row. */
+ /* The transaction failed for some retries with
+ _HTM_TBEGIN_TRANSIENT. Use the normal locking now and for the
+ next couple of calls. */
if (aconf.skip_lock_out_of_tbegin_retries > 0)
atomic_store_relaxed (adapt_count,
aconf.skip_lock_out_of_tbegin_retries);
}
}
- use_lock:
/* Use normal locking as fallback path if transaction does not succeed. */
return LLL_LOCK ((*futex), private);
}
until their try_tbegin is zero.
*/
__libc_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
+ __builtin_unreachable ();
}
- /* Only try a transaction if it's worth it. See __lll_lock_elision for
- why we need atomic accesses. Relaxed MO is sufficient because this is
- just a hint. */
- if (atomic_load_relaxed (adapt_count) <= 0)
+ /* adapt_count can be accessed concurrently; these accesses can be both
+ inside of transactions (if critical sections are nested and the outer
+ critical section uses lock elision) and outside of transactions. Thus,
+ we need to use atomic accesses to avoid data races. However, the
+ value of adapt_count is just a hint, so relaxed MO accesses are
+ sufficient.
+ Do not begin a transaction if another cpu has locked the
+ futex with normal locking. If adapt_count is zero, it remains and the
+ next pthread_mutex_lock call will try to start a transaction again. */
+ if (atomic_load_relaxed (futex) == 0
+ && atomic_load_relaxed (adapt_count) <= 0 && aconf.try_tbegin > 0)
{
- int status;
-
- if (__builtin_expect
- ((status = __libc_tbegin ((void *) 0)) == _HTM_TBEGIN_STARTED, 1))
+ int status = __libc_tbegin ((void *) 0);
+ if (__builtin_expect (status == _HTM_TBEGIN_STARTED,
+ _HTM_TBEGIN_STARTED))
{
- if (*futex == 0)
+ /* Check the futex to make sure nobody has touched it in the
+ mean time. This forces the futex into the cache and makes
+ sure the transaction aborts if some other cpu uses the
+ lock (writes the futex). */
+ if (__builtin_expect (atomic_load_relaxed (futex) == 0, 1))
+ /* Lock was free. Return to user code in a transaction. */
return 0;
- /* Lock was busy. Fall back to normal locking. */
- /* Since we are in a non-nested transaction there is no need to abort,
- which is expensive. */
+
+ /* Lock was busy. Fall back to normal locking. Since we are in
+ a non-nested transaction there is no need to abort, which is
+ expensive. Simply end the started transaction. */
__libc_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a
different cpu, but that could happen anyway when the futex is
if (aconf.skip_lock_busy > 0)
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
}
- else
+ else if (status != _HTM_TBEGIN_TRANSIENT)
{
- if (status != _HTM_TBEGIN_TRANSIENT)
- {
- /* A persistent abort (cc 1 or 3) indicates that a retry is
- probably futile. Use the normal locking now and for the
- next couple of calls.
- Be careful to avoid writing to the lock. */
- if (aconf.skip_trylock_internal_abort > 0)
- *adapt_count = aconf.skip_trylock_internal_abort;
- }
+ /* A persistent abort (cc 1 or 3) indicates that a retry is
+ probably futile. Use the normal locking now and for the
+ next couple of calls.
+ Be careful to avoid writing to the lock. */
+ if (aconf.skip_trylock_internal_abort > 0)
+ *adapt_count = aconf.skip_trylock_internal_abort;
}
/* Could do some retries here. */
}
- else
- {
- /* Lost updates are possible, but harmless. Due to races this might lead
- to *adapt_count becoming less than zero. */
- atomic_store_relaxed (adapt_count,
- atomic_load_relaxed (adapt_count) - 1);
- }
+ /* Use normal locking as fallback path if transaction does not succeed. */
return lll_trylock (*futex);
}
#include <htm.h>
int
-__lll_unlock_elision(int *futex, int private)
+__lll_unlock_elision(int *futex, short *adapt_count, int private)
{
/* If the lock is free, we elided the lock earlier. This does not
necessarily mean that we are in a transaction, because the user code may
- have closed the transaction, but that is impossible to detect reliably. */
- if (*futex == 0)
+ have closed the transaction, but that is impossible to detect reliably.
+ Relaxed MO access to futex is sufficient as we only need a hint, if we
+ started a transaction or acquired the futex in e.g. elision-lock.c. */
+ if (atomic_load_relaxed (futex) == 0)
{
__libc_tend ();
}
else
- lll_unlock ((*futex), private);
+ {
+ /* Update the adapt_count while unlocking before completing the critical
+ section. adapt_count is accessed concurrently outside of a
+ transaction or an aquired lock e.g. in elision-lock.c so we need to use
+ atomic accesses. However, the value of adapt_count is just a hint, so
+ relaxed MO accesses are sufficient.
+ If adapt_count would be decremented while locking, multiple
+ CPUs trying to lock the locked mutex will decrement adapt_count to
+ zero and another CPU will try to start a transaction, which will be
+ immediately aborted as the mutex is locked.
+ If adapt_count would be decremented while unlocking after completing
+ the critical section, possible waiters will be waked up before
+ decrementing the adapt_count. Those waked up waiters could have
+ destroyed and freed this mutex! */
+ short adapt_count_val = atomic_load_relaxed (adapt_count);
+ if (adapt_count_val > 0)
+ atomic_store_relaxed (adapt_count, adapt_count_val - 1);
+
+ lll_unlock ((*futex), private);
+ }
return 0;
}
extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
attribute_hidden;
-extern int __lll_unlock_elision(int *futex, int private)
+extern int __lll_unlock_elision(int *futex, short *adapt_count, int private)
attribute_hidden;
extern int __lll_trylock_elision(int *futex, short *adapt_count)
# define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
# define lll_unlock_elision(futex, adapt_count, private) \
- __lll_unlock_elision (&(futex), private)
+ __lll_unlock_elision (&(futex), &(adapt_count), private)
# define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision(&(futex), &(adapt_count))
# endif /* ENABLE_LOCK_ELISION */