2 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
3 * Original Author: Hans Boehm
5 * This file may be redistributed and/or modified under the
6 * terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 2, or (at your option) any later version.
9 * It is distributed in the hope that it will be useful, but WITHOUT ANY
10 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License in the
12 * file doc/COPYING for more details.
15 #if defined(HAVE_CONFIG_H)
22 #define AO_REQUIRE_CAS
23 #include "atomic_ops_stack.h"
25 #if defined(_MSC_VER) \
26 || defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__)
27 /* AO_pause not defined elsewhere */
28 /* FIXME: At least AO_spin should be factored out. */
33 /* Spin for 2**n units. */
34 static void AO_spin(int n)
37 AO_T j = AO_load(&dummy);
39 for (i = 0; i < (2 << n); ++i)
55 /* Short async-signal-safe sleep. */
56 msecs = (n > 18? 100 : (1 << (n - 12)));
63 /* AO_pause is available elsewhere */
65 extern void AO_pause(int);
69 #ifdef AO_USE_ALMOST_LOCK_FREE
71 /* LIFO linked lists based on compare-and-swap. We need to avoid */
72 /* the case of a node deleton and reinsertion while I'm deleting */
73 /* it, since that may cause my CAS to succeed eventhough the next */
74 /* pointer is now wrong. Our solution is not fully lock-free, but it */
75 /* is good enough for signal handlers, provided we have a suitably low */
76 /* bound on the number of recursive signal handler reentries. */
77 /* A list consists of a first pointer and a blacklist */
78 /* of pointer values that are currently being removed. No list element */
79 /* on the blacklist may be inserted. If we would otherwise do so, we */
80 /* are allowed to insert a variant that differs only in the least */
81 /* significant, ignored, bits. If the list is full, we wait. */
83 /* Crucial observation: A particular padded pointer x (i.e. pointer */
84 /* plus arbitrary low order bits) can never be newly inserted into */
85 /* a list while it's in the corresponding auxiliary data structure. */
87 /* The second argument is a pointer to the link field of the element */
89 /* Both list headers and link fields contain "perturbed" pointers, i.e. */
90 /* pointers with extra bits "or"ed into the low order bits. */
92 AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
96 AO_t x_bits = (AO_t)x;
99 /* No deletions of x can start here, since x is not currently in the */
104 /* Start all loads as close to concurrently as possible. */
105 AO_t entry1 = AO_load(a -> AO_stack_bl);
106 AO_t entry2 = AO_load(a -> AO_stack_bl + 1);
107 if (entry1 == x_bits || entry2 == x_bits)
109 /* Entry is currently being removed. Change it a little. */
111 if ((x_bits & AO_BIT_MASK) == 0)
112 /* Version count overflowed; */
113 /* EXTREMELY unlikely, but possible. */
119 for (i = 0; i < AO_BL_SIZE; ++i)
121 if (AO_load(a -> AO_stack_bl + i) == x_bits)
123 /* Entry is currently being removed. Change it a little. */
125 if ((x_bits & AO_BIT_MASK) == 0)
126 /* Version count overflowed; */
127 /* EXTREMELY unlikely, but possible. */
133 /* x_bits is not currently being deleted */
136 next = AO_load(list);
139 while(!AO_compare_and_swap_release(list, next, x_bits));
143 * I concluded experimentally that checking a value first before
144 * performing a compare-and-swap is usually beneficial on X86, but
145 * slows things down appreciably with contention on Itanium.
146 * ince the Itanium behavior makes more sense to me (more cache line
147 * movement unless we're mostly reading, but back-off should guard
148 * against that), we take Itanium as the default. Measurements on
149 * other multiprocessor architectures would be useful. (On a uniprocessor,
150 * the initial check is almost certainly a very small loss.) - HB
153 # define PRECHECK(a) (a) == 0 &&
159 AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux * a)
168 first = AO_load(list);
169 if (0 == first) return 0;
170 /* Insert first into aux black list. */
171 /* This may spin if more than AO_BL_SIZE removals using auxiliary */
172 /* structure a are currently in progress. */
175 if (PRECHECK(a -> AO_stack_bl[i])
176 AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
179 if ( i >= AO_BL_SIZE )
185 assert(i >= 0 && i < AO_BL_SIZE);
186 assert(a -> AO_stack_bl[i] == first);
187 /* First is on the auxiliary black list. It may be removed by */
188 /* another thread before we get to it, but a new insertion of x */
189 /* cannot be started here. */
190 /* Only we can remove it from the black list. */
191 /* We need to make sure that first is still the first entry on the */
192 /* list. Otherwise it's possible that a reinsertion of it was */
193 /* already started before we added the black list entry. */
194 if (first != AO_load(list)) {
195 AO_store_release(a->AO_stack_bl+i, 0);
198 first_ptr = AO_REAL_NEXT_PTR(first);
199 next = AO_load(first_ptr);
200 if (!AO_compare_and_swap_release(list, first, next)) {
201 AO_store_release(a->AO_stack_bl+i, 0);
204 assert(*list != first);
205 /* Since we never insert an entry on the black list, this cannot have */
206 /* succeeded unless first remained on the list while we were running. */
207 /* Thus its next link cannot have changed out from under us, and we */
208 /* removed exactly one entry and preserved the rest of the list. */
209 /* Note that it is quite possible that an additional entry was */
210 /* inserted and removed while we were running; this is OK since the */
211 /* part of the list following first must have remained unchanged, and */
212 /* first must again have been at the head of the list when the */
213 /* compare_and_swap succeeded. */
214 AO_store_release(a->AO_stack_bl+i, 0);
218 #else /* ! USE_ALMOST_LOCK_FREE */
220 /* Better names for fields in AO_stack_t */
222 #define version AO_val1
224 #if defined(AO_HAVE_compare_double_and_swap_double)
226 void AO_stack_push_release(AO_stack_t *list, AO_t *element)
231 next = AO_load(&(list -> ptr));
233 } while (!AO_compare_and_swap_release
234 ( &(list -> ptr), next, (AO_t) element));
235 /* This uses a narrow CAS here, an old optimization suggested */
236 /* by Treiber. Pop is still safe, since we run into the ABA */
237 /* problem only if there were both interveining "pop"s and "push"es.*/
238 /* Inthat case we still see a change inthe version number. */
241 AO_t *AO_stack_pop_acquire(AO_stack_t *list)
248 /* Version must be loaded first. */
249 cversion = AO_load_acquire(&(list -> version));
250 cptr = (AO_t *)AO_load(&(list -> ptr));
251 if (cptr == 0) return 0;
253 } while (!AO_compare_double_and_swap_double_release
254 (list, cversion, (AO_t) cptr, cversion+1, (AO_t) next));
259 #elif defined(AO_HAVE_compare_and_swap_double)
261 /* Needed for future IA64 processors. No current clients? */
263 #error Untested! Probably doesnt work.
265 /* We have a wide CAS, but only does an AO_t-wide comparison. */
266 /* We can't use the Treiber optimization, since we only check */
267 /* for an unchanged version number, not an unchanged pointer. */
268 void AO_stack_push_release(AO_stack_t *list, AO_t *element)
274 /* Again version must be loaded first, for different reason. */
275 version = AO_load_acquire(&(list -> version));
276 next_ptr = AO_load(&(list -> ptr));
278 } while (!AO_compare_and_swap_double_release(
280 version+1, (AO_t) element));
283 AO_t *AO_stack_pop_acquire(AO_stack_t *list)
290 cversion = AO_load_acquire(&(list -> version));
291 cptr = (AO_t *)AO_load(&(list -> ptr));
292 if (cptr == 0) return 0;
294 } while (!AO_compare_double_and_swap_double_release
295 (list, cversion, (AO_t) cptr, cversion+1, next));
300 #endif /* AO_HAVE_compare_and_swap_double */
302 #endif /* ! USE_ALMOST_LOCK_FREE */