2 * Copyright (c) 2003 Hewlett-Packard Development Company, L.P.
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 /* We define various atomic operations on memory in a */
31 /* machine-specific way. Unfortunately, this is complicated */
32 /* by the fact that these may or may not be combined with */
33 /* various memory barriers. Thus the actual operations we */
34 /* define have the form AO_<atomic-op>_<barrier>, for all */
35 /* plausible combinations of <atomic-op> and <barrier>. */
36 /* This of course results in a mild combinatorial explosion. */
37 /* To deal with it, we try to generate derived */
38 /* definitions for as many of the combinations as we can, as */
39 /* automatically as possible. */
41 /* Our assumption throughout is that the programmer will */
42 /* specify the least demanding operation and memory barrier */
43 /* that will guarantee correctness for the implementation. */
44 /* Our job is to find the least expensive way to implement it */
45 /* on the applicable hardware. In many cases that will */
46 /* involve, for example, a stronger memory barrier, or a */
47 /* combination of hardware primitives. */
50 /* "plain" atomic operations are not guaranteed to include */
51 /* a barrier. The suffix in the name specifies the barrier */
52 /* type. Suffixes are: */
53 /* _release: Earlier operations may not be delayed past it. */
54 /* _acquire: Later operations may not move ahead of it. */
55 /* _read: Subsequent reads must follow this operation and */
56 /* preceding reads. */
57 /* _write: Earlier writes precede both this operation and */
59 /* _full: Ordered with respect to both earlier and later memops.*/
60 /* _release_write: Ordered with respect to earlier writes. */
61 /* _acquire_read: Ordered with repsect to later reads. */
63 /* Currently we try to define the following atomic memory */
64 /* operations, in combination with the above barriers: */
68 /* AO_test_and_set (binary) */
69 /* AO_fetch_and_add */
70 /* AO_fetch_and_add1 */
71 /* AO_fetch_and_sub1 */
73 /* AO_compare_and_swap */
75 /* Note that atomicity guarantees are valid only if both */
76 /* readers and writers use AO_ operations to access the */
77 /* shared value, while ordering constraints are intended to */
78 /* apply all memory operations. If a location can potentially */
79 /* be accessed simultaneously from multiple threads, and one of */
80 /* those accesses may be a write access, then all such */
81 /* accesses to that location should be through AO_ primitives. */
82 /* However if AO_ operations enforce sufficient ordering to */
83 /* ensure that a location x cannot be accessed concurrently, */
84 /* or can only be read concurrently, then x can be accessed */
85 /* via ordinary references and assignments. */
87 /* Compare_and_exchange takes an address and an expected old */
88 /* value and a new value, and returns an int. Nonzero */
89 /* indicates that it succeeded. */
90 /* Test_and_set takes an address, atomically replaces it by */
91 /* AO_TS_SET, and returns the prior value. */
92 /* An AO_TS_t location can be reset with the */
93 /* AO_CLEAR macro, which normally uses AO_store_release. */
94 /* AO_fetch_and_add takes an address and an AO_t increment */
95 /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
96 /* are provided, since they allow faster implementations on */
97 /* some hardware. AO_or atomically ors an AO_t value into a */
98 /* memory location, but does not provide access to the original.*/
100 /* We expect this list to grow slowly over time. */
102 /* Note that AO_nop_full is a full memory barrier. */
104 /* Note that if some data is initialized with */
105 /* data.x = ...; data.y = ...; ... */
106 /* AO_store_release_write(&data_is_initialized, 1) */
107 /* then data is guaranteed to be initialized after the test */
108 /* if (AO_load_release_read(&data_is_initialized)) ... */
109 /* succeeds. Furthermore, this should generate near-optimal */
110 /* code on all common platforms. */
112 /* All operations operate on unsigned AO_t, which */
113 /* is the natural word size, and usually unsigned long. */
114 /* It is possible to check whether a particular operation op */
115 /* is available on a particular platform by checking whether */
116 /* AO_HAVE_op is defined. We make heavy use of these macros */
119 /* The rest of this file basically has three sections: */
121 /* Some utility and default definitions. */
123 /* The architecture dependent section: */
124 /* This defines atomic operations that have direct hardware */
125 /* support on a particular platform, mostly by uncluding the */
126 /* appropriate compiler- and hardware-dependent file. */
128 /* The synthesis section: */
129 /* This tries to define other atomic operations in terms of */
130 /* those that are explicitly available on the platform. */
131 /* This section is hardware independent. */
132 /* We make no attempt to synthesize operations in ways that */
133 /* effectively introduce locks, except for the debugging/demo */
134 /* pthread-based implementation at the beginning. A more */
135 /* relistic implementation that falls back to locks could be */
136 /* added as a higher layer. But that would sacrifice */
137 /* usability from signal handlers. */
138 /* The synthesis section is implemented almost entirely in */
139 /* atomic_ops_generalize.h. */
141 /* Some common defaults. Overridden for some architectures. */
144 /* The test_and_set primitive returns an AO_TS_VAL_t value. */
145 /* AO_TS_t is the type of an in-memory test-and-set location. */
147 #define AO_TS_INITIALIZER (AO_t)AO_TS_CLEAR
149 /* Platform-dependent stuff: */
150 #if defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER)
151 # define AO_INLINE static __inline
153 # define AO_INLINE static
156 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
157 # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
158 #elif defined(_MSC_VER)
159 # if defined(_AMD64_)
160 # pragma intrinsic(_ReadWriteBarrier)
161 # define AO_compiler_barrier() _ReadWriteBarrier()
162 /* We assume this does not generate a fence instruction. */
163 /* The documentation is a bit unclear. */
165 # define AO_compiler_barrier() __asm { }
166 /* The preceding implementation may be preferable here too. */
167 /* But the documentation warns about VC++ 2003 and earlier. */
169 #elif defined(__INTEL_COMPILER)
170 # define AO_compiler_barrier() __memory_barrier() /* Too strong? IA64-only? */
171 #elif defined(_HPUX_SOURCE)
173 # include <machine/sys/inline.h>
174 # define AO_compiler_barrier() _Asm_sched_fence()
176 /* FIXME - We dont know how to do this. This is a guess. */
177 /* And probably a bad one. */
178 static volatile int AO_barrier_dummy;
179 # define AO_compiler_barrier() AO_barrier_dummy = AO_barrier_dummy
182 /* We conjecture that the following usually gives us the right */
183 /* semantics or an error. */
184 # define AO_compiler_barrier() asm("")
187 #if defined(AO_USE_PTHREAD_DEFS)
188 # include "atomic_ops/sysdeps/generic_pthread.h"
189 #endif /* AO_USE_PTHREAD_DEFS */
191 #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
192 && !defined(__INTEL_COMPILER)
193 # if defined(__i386__)
194 # include "atomic_ops/sysdeps/gcc/x86.h"
195 # endif /* __i386__ */
196 # if defined(__x86_64__)
197 # include "atomic_ops/sysdeps/gcc/x86_64.h"
198 # endif /* __i386__ */
199 # if defined(__ia64__)
200 # include "atomic_ops/sysdeps/gcc/ia64.h"
201 # define AO_GENERALIZE_TWICE
202 # endif /* __ia64__ */
203 # if defined(__hppa__)
204 # include "atomic_ops/sysdeps/gcc/hppa.h"
205 # define AO_CAN_EMUL_CAS
206 # endif /* __hppa__ */
207 # if defined(__alpha__)
208 # include "atomic_ops/sysdeps/gcc/alpha.h"
209 # define AO_GENERALIZE_TWICE
210 # endif /* __alpha__ */
211 # if defined(__s390__)
212 # include "atomic_ops/sysdeps/gcc/s390.h"
213 # endif /* __s390__ */
214 # if defined(__sparc__)
215 # include "atomic_ops/sysdeps/gcc/sparc.h"
216 # define AO_CAN_EMUL_CAS
217 # endif /* __sparc__ */
218 # if defined(__m68k__)
219 # include "atomic_ops/sysdeps/gcc/m68k.h"
220 # endif /* __m68k__ */
221 # if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
222 || defined(__powerpc64__) || defined(__ppc64__)
223 # include "atomic_ops/sysdeps/gcc/powerpc.h"
224 # endif /* __powerpc__ */
225 # if defined(__arm__) && !defined(AO_USE_PTHREAD_DEFS)
226 # include "atomic_ops/sysdeps/gcc/arm.h"
227 # define AO_CAN_EMUL_CAS
228 # endif /* __arm__ */
229 # if defined(__cris__) || defined(CRIS)
230 # include "atomic_ops/sysdeps/gcc/cris.h"
232 #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
234 #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
235 # if defined(__ia64__)
236 # include "atomic_ops/sysdeps/icc/ia64.h"
237 # define AO_GENERALIZE_TWICE
241 #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
243 # include "atomic_ops/sysdeps/hpc/ia64.h"
244 # define AO_GENERALIZE_TWICE
246 # include "atomic_ops/sysdeps/hpc/hppa.h"
247 # define AO_CAN_EMUL_CAS
251 #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
252 && !defined(AO_USE_PTHREAD_DEFS)
253 # include "atomic_ops/sysdeps/sunc/sparc.h"
254 # define AO_CAN_EMUL_CAS
257 #if defined(_MSC_VER)
258 # if defined(_AMD64_)
259 # include "atomic_ops/sysdeps/msftc/x86_64.h"
260 # elif _M_IX86 >= 400
261 # include "atomic_ops/sysdeps/msftc/x86.h"
265 #if defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
266 && !defined(AO_HAVE_compare_and_swap_full) \
267 && !defined(AO_HAVE_compare_and_swap_acquire)
268 # if defined(AO_CAN_EMUL_CAS)
269 # include "atomic_ops/sysdeps/emul_cas.h"
271 # error Cannot implement AO_compare_and_swap_full on this architecture.
273 #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
275 /* The most common way to clear a test-and-set location */
276 /* at the end of a critical section. */
277 #if AO_AO_TS_T && !defined(AO_CLEAR)
278 # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)addr, AO_TS_CLEAR)
280 #if AO_CHAR_TS_T && !defined(AO_CLEAR)
281 # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)addr, AO_TS_CLEAR)
285 * The generalization section.
286 * Theoretically this should repeatedly include atomic_ops_generalize.h.
287 * In fact, we observe that this converges after a small fixed number
288 * of iterations, usually one.
290 #include "atomic_ops/generalize.h"
291 #ifdef AO_GENERALIZE_TWICE
292 # include "atomic_ops/generalize.h"
295 /* For compatibility with version 0.4 and earlier */
296 #define AO_TS_T AO_TS_t
298 #define AO_TS_VAL AO_TS_VAL_t
300 #endif /* ATOMIC_OPS_H */