+2001-05-01 Kaz Kylheku <kaz@ashi.footprints.net>
+
+ Memory barrier overhaul following line by line inspection.
+ * mutex.c (pthread_once): Missing memory barriers added.
+ * pthread.c (__pthread_wait_for_restart_signal,
+ __pthread_timedsuspend_new, __pthread_restart_new): Added
+ memory barriers ``just in case'' and for documentary value.
+ * spinlock.c (__pthread_release): New inline function for releasing
+ spinlock, to complement __pthread_acquire. Includes memory
+ barrier prior to assignment to spinlock, and __asm __volatile
+ dance to prevent reordering or optimization of the spinlock access.
+ * spinlock.c (__pthread_unlock, __pthread_alt_lock,
+ __pthread_alt_timedlock, __pthread_alt_unlock,
+ __pthread_compare_and_swap): Updated to use new __pthread_release
+ instead of updating spinlock directly.
+ * spinlock.c (__pthread_lock, __pthread_unlock, wait_node_alloc,
+ wait_node_free, wait_node_dequeue, __pthread_alt_lock,
+ __pthread_alt_timedlock, __pthread_alt_unlock, __pthread_acquire):
+ Memory barrier overhaul. Lots of missing memory barriers added,
+ a couple needless ones removed.
+ * spinlock.c (__pthread_compare_and_swap): testandset optimization
+ removed, just calls __pthread_acquire, which has the new read
+ barrier in it before its testandset.
+
2001-05-20 Roland McGrath <roland@frob.com>
* Makeconfig: New file, variables used to be in main libc Makeconfig.
#if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
static void __pthread_acquire(int * spinlock);
+
+static inline void __pthread_release(int * spinlock)
+{
+ WRITE_MEMORY_BARRIER();
+ *spinlock = __LT_SPINLOCK_INIT;
+ __asm __volatile ("" : "=m" (*spinlock) : "0" (*spinlock));
+}
#endif
{
if (spin_count)
lock->__spinlock += (spin_count - lock->__spinlock) / 8;
+ READ_MEMORY_BARRIER();
return;
}
}
/* Put back any resumes we caught that don't belong to us. */
while (spurious_wakeup_count--)
restart(self);
+
+ READ_MEMORY_BARRIER();
#endif
}
#endif
#if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
{
- WRITE_MEMORY_BARRIER();
- lock->__spinlock = __LT_SPINLOCK_INIT;
+ __pthread_release(&lock->__spinlock);
return 0;
}
#endif
#if defined HAS_COMPARE_AND_SWAP
+ WRITE_MEMORY_BARRIER();
+
again:
oldstatus = lock->__status;
thr = (pthread_descr) (oldstatus & ~1L);
maxprio = 0;
maxptr = ptr;
+
+ /* Before we iterate over the wait queue, we need to execute
+ a read barrier, otherwise we may read stale contents of nodes that may
+ just have been inserted by other processors. One read barrier is enough to
+ ensure we have a stable list; we don't need one for each pointer chase
+ through the list, because we are the owner of the lock; other threads
+ can only add nodes at the front; if a front node is consistent,
+ the ones behind it must also be. */
+
+ READ_MEMORY_BARRIER();
+
while (thr != 0) {
if (thr->p_priority >= maxprio) {
maxptr = ptr;
maxprio = thr->p_priority;
}
ptr = &(thr->p_nextlock);
- /* Prevent reordering of the load of lock->__status above and the
- load of *ptr below, as well as reordering of *ptr between
- several iterations of the while loop. Some processors (e.g.
- multiprocessor Alphas) could perform such reordering even though
- the loads are dependent. */
- READ_MEMORY_BARRIER();
thr = *ptr;
}
- /* Prevent reordering of the load of lock->__status above and
- thr->p_nextlock below */
- READ_MEMORY_BARRIER();
+
/* Remove max prio thread from waiting list. */
if (maxptr == (pthread_descr *) &lock->__status) {
/* If max prio thread is at head, remove it with compare-and-swap
thr = *maxptr;
*maxptr = thr->p_nextlock;
+ /* Ensure deletion from linked list completes before we
+ release the lock. */
+ WRITE_MEMORY_BARRIER();
+
do {
oldstatus = lock->__status;
} while (!__compare_and_swap_with_release_semantics(&lock->__status,
oldstatus, oldstatus & ~1L));
}
- /* Prevent reordering of store to *maxptr above and store to thr->p_nextlock
- below */
- WRITE_MEMORY_BARRIER();
- /* Wake up the selected waiting thread */
+
+ /* Wake up the selected waiting thread. Woken thread can check
+ its own p_nextlock field for NULL to detect that it has been removed. No
+ barrier is needed here, since restart() and suspend() take
+ care of memory synchronization. */
+
thr->p_nextlock = NULL;
restart(thr);
if (oldvalue == 0)
return malloc(sizeof *wait_node_alloc());
+ /* Ensure we don't read stale next link through oldvalue pointer. */
+ READ_MEMORY_BARRIER();
newvalue = (long) ((struct wait_node *) oldvalue)->next;
- WRITE_MEMORY_BARRIER();
} while (! __compare_and_swap(&wait_node_free_list, oldvalue, newvalue));
return (struct wait_node *) oldvalue;
oldvalue = wait_node_free_list;
wn->next = (struct wait_node *) oldvalue;
newvalue = (long) wn;
+ /* Ensure node contents are written before we swap it into the list. */
WRITE_MEMORY_BARRIER();
} while (! __compare_and_swap(&wait_node_free_list, oldvalue, newvalue));
#endif
Otherwise it can be deleted in the straightforward way. */
if (pp_node == pp_head) {
+ /* We don't need a read barrier between these next two loads,
+ because it is assumed that the caller has already ensured
+ the stability of *p_node with respect to p_node. */
+
long oldvalue = (long) p_node;
long newvalue = (long) p_node->next;
know the identity of the node which now holds the pointer to the node
being deleted, so we must search from the beginning. */
- for (pp_node = pp_head; *pp_node != p_node; pp_node = &(*pp_node)->next)
- ; /* null body */
+ for (pp_node = pp_head; p_node != *pp_node; ) {
+ pp_node = &(*pp_node)->next;
+ READ_MEMORY_BARRIER(); /* Stabilize *pp_node for next iteration. */
+ }
}
*pp_node = p_node->next;
suspend_needed = 1;
}
- WRITE_MEMORY_BARRIER();
- lock->__spinlock = __LT_SPINLOCK_INIT;
+ __pthread_release(&lock->__spinlock);
if (suspend_needed)
suspend (self);
if (oldstatus != 0)
suspend(self);
+
+ READ_MEMORY_BARRIER();
#endif
}
oldstatus = 1; /* force suspend */
}
- WRITE_MEMORY_BARRIER();
- lock->__spinlock = __LT_SPINLOCK_INIT;
+ __pthread_release(&lock->__spinlock);
goto suspend;
}
#endif
wait_node_free(p_wait_node);
+ READ_MEMORY_BARRIER();
+
return 1; /* Got the lock! */
}
struct wait_node ** const pp_head = (struct wait_node **) &lock->__status;
int maxprio;
+ WRITE_MEMORY_BARRIER();
+
#if defined TEST_FOR_COMPARE_AND_SWAP
if (!__pthread_has_cas)
#endif
p_max_prio = p_node = *pp_head;
maxprio = INT_MIN;
+ READ_MEMORY_BARRIER(); /* Prevent access to stale data through p_node */
+
while (p_node != (struct wait_node *) 1) {
int prio;
wait_node_dequeue(pp_head, pp_node, p_node);
#endif
wait_node_free(p_node);
- READ_MEMORY_BARRIER();
+ /* Note that the next assignment may take us to the beginning
+ of the queue, to newly inserted nodes, if pp_node == pp_head.
+ In that case we need a memory barrier to stabilize the first of
+ these new nodes. */
p_node = *pp_node;
+ if (pp_node == pp_head)
+ READ_MEMORY_BARRIER(); /* No stale reads through p_node */
continue;
} else if ((prio = p_node->thr->p_priority) >= maxprio) {
/* Otherwise remember it if its thread has a higher or equal priority
p_max_prio = p_node;
}
+ /* This canno6 jump backward in the list, so no further read
+ barrier is needed. */
pp_node = &p_node->next;
- READ_MEMORY_BARRIER();
p_node = *pp_node;
}
- READ_MEMORY_BARRIER();
-
/* If all threads abandoned, go back to top */
if (maxprio == INT_MIN)
continue;
#if defined HAS_COMPARE_AND_SWAP
wait_node_dequeue(pp_head, pp_max_prio, p_max_prio);
#endif
- WRITE_MEMORY_BARRIER();
restart(p_max_prio->thr);
break;
}
#endif
#if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
{
- WRITE_MEMORY_BARRIER();
- lock->__spinlock = __LT_SPINLOCK_INIT;
+ __pthread_release(&lock->__spinlock);
}
#endif
}
int * spinlock)
{
int res;
- if (testandset(spinlock)) __pthread_acquire(spinlock);
+
+ __pthread_acquire(spinlock);
+
if (*ptr == oldval) {
*ptr = newval; res = 1;
} else {
res = 0;
}
- /* Prevent reordering of store to *ptr above and store to *spinlock below */
- WRITE_MEMORY_BARRIER();
- *spinlock = 0;
+
+ __pthread_release(spinlock);
+
return res;
}
int cnt = 0;
struct timespec tm;
+ READ_MEMORY_BARRIER();
+
while (testandset(spinlock)) {
if (cnt < MAX_SPIN_COUNT) {
sched_yield();