2 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3 * Copyright (c) 2008-2018 Ivan Maidanski
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #ifndef AO_ATOMIC_OPS_H
25 #define AO_ATOMIC_OPS_H
27 #include "atomic_ops/ao_version.h"
28 /* Define version numbers here to allow */
29 /* test on build machines for cross-builds. */
34 /* We define various atomic operations on memory in a */
35 /* machine-specific way. Unfortunately, this is complicated */
36 /* by the fact that these may or may not be combined with */
37 /* various memory barriers. Thus the actual operations we */
38 /* define have the form AO_<atomic-op>_<barrier>, for all */
39 /* plausible combinations of <atomic-op> and <barrier>. */
40 /* This of course results in a mild combinatorial explosion. */
41 /* To deal with it, we try to generate derived */
42 /* definitions for as many of the combinations as we can, as */
43 /* automatically as possible. */
45 /* Our assumption throughout is that the programmer will */
46 /* specify the least demanding operation and memory barrier */
47 /* that will guarantee correctness for the implementation. */
48 /* Our job is to find the least expensive way to implement it */
49 /* on the applicable hardware. In many cases that will */
50 /* involve, for example, a stronger memory barrier, or a */
51 /* combination of hardware primitives. */
54 /* "plain" atomic operations are not guaranteed to include */
55 /* a barrier. The suffix in the name specifies the barrier */
56 /* type. Suffixes are: */
57 /* _release: Earlier operations may not be delayed past it. */
58 /* _acquire: Later operations may not move ahead of it. */
59 /* _read: Subsequent reads must follow this operation and */
60 /* preceding reads. */
61 /* _write: Earlier writes precede both this operation and */
63 /* _full: Ordered with respect to both earlier and later memory */
65 /* _release_write: Ordered with respect to earlier writes. */
66 /* _acquire_read: Ordered with respect to later reads. */
68 /* Currently we try to define the following atomic memory */
69 /* operations, in combination with the above barriers: */
73 /* AO_test_and_set (binary) */
74 /* AO_fetch_and_add */
75 /* AO_fetch_and_add1 */
76 /* AO_fetch_and_sub1 */
80 /* AO_compare_and_swap */
81 /* AO_fetch_compare_and_swap */
83 /* Note that atomicity guarantees are valid only if both */
84 /* readers and writers use AO_ operations to access the */
85 /* shared value, while ordering constraints are intended to */
86 /* apply all memory operations. If a location can potentially */
87 /* be accessed simultaneously from multiple threads, and one of */
88 /* those accesses may be a write access, then all such */
89 /* accesses to that location should be through AO_ primitives. */
90 /* However if AO_ operations enforce sufficient ordering to */
91 /* ensure that a location x cannot be accessed concurrently, */
92 /* or can only be read concurrently, then x can be accessed */
93 /* via ordinary references and assignments. */
95 /* AO_compare_and_swap takes an address and an expected old */
96 /* value and a new value, and returns an int. Non-zero result */
97 /* indicates that it succeeded. */
98 /* AO_fetch_compare_and_swap takes an address and an expected */
99 /* old value and a new value, and returns the real old value. */
100 /* The operation succeeded if and only if the expected old */
101 /* value matches the old value returned. */
103 /* Test_and_set takes an address, atomically replaces it by */
104 /* AO_TS_SET, and returns the prior value. */
105 /* An AO_TS_t location can be reset with the */
106 /* AO_CLEAR macro, which normally uses AO_store_release. */
107 /* AO_fetch_and_add takes an address and an AO_t increment */
108 /* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
109 /* are provided, since they allow faster implementations on */
110 /* some hardware. AO_and, AO_or, AO_xor do atomically and, or, */
111 /* xor (respectively) an AO_t value into a memory location, */
112 /* but do not provide access to the original. */
114 /* We expect this list to grow slowly over time. */
116 /* Note that AO_nop_full is a full memory barrier. */
118 /* Note that if some data is initialized with */
119 /* data.x = ...; data.y = ...; ... */
120 /* AO_store_release_write(&data_is_initialized, 1) */
121 /* then data is guaranteed to be initialized after the test */
122 /* if (AO_load_acquire_read(&data_is_initialized)) ... */
123 /* succeeds. Furthermore, this should generate near-optimal */
124 /* code on all common platforms. */
126 /* All operations operate on unsigned AO_t, which */
127 /* is the natural word size, and usually unsigned long. */
128 /* It is possible to check whether a particular operation op */
129 /* is available on a particular platform by checking whether */
130 /* AO_HAVE_op is defined. We make heavy use of these macros */
133 /* The rest of this file basically has three sections: */
135 /* Some utility and default definitions. */
137 /* The architecture dependent section: */
138 /* This defines atomic operations that have direct hardware */
139 /* support on a particular platform, mostly by including the */
140 /* appropriate compiler- and hardware-dependent file. */
142 /* The synthesis section: */
143 /* This tries to define other atomic operations in terms of */
144 /* those that are explicitly available on the platform. */
145 /* This section is hardware independent. */
146 /* We make no attempt to synthesize operations in ways that */
147 /* effectively introduce locks, except for the debugging/demo */
148 /* pthread-based implementation at the beginning. A more */
149 /* realistic implementation that falls back to locks could be */
150 /* added as a higher layer. But that would sacrifice */
151 /* usability from signal handlers. */
152 /* The synthesis section is implemented almost entirely in */
153 /* atomic_ops/generalize.h. */
155 /* Some common defaults. Overridden for some architectures. */
158 /* The test_and_set primitive returns an AO_TS_VAL_t value. */
159 /* AO_TS_t is the type of an in-memory test-and-set location. */
161 #define AO_TS_INITIALIZER ((AO_TS_t)AO_TS_CLEAR)
163 /* Convenient internal macro to test version of GCC. */
164 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
165 # define AO_GNUC_PREREQ(major, minor) \
166 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((major) << 16) + (minor))
168 # define AO_GNUC_PREREQ(major, minor) 0 /* false */
171 /* Convenient internal macro to test version of Clang. */
172 #if defined(__clang__) && defined(__clang_major__)
173 # define AO_CLANG_PREREQ(major, minor) \
174 ((__clang_major__ << 16) + __clang_minor__ >= ((major) << 16) + (minor))
176 # define AO_CLANG_PREREQ(major, minor) 0 /* false */
179 /* Platform-dependent stuff: */
180 #if (defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
181 || defined(__DMC__) || defined(__WATCOMC__)) && !defined(AO_NO_INLINE)
182 # define AO_INLINE static __inline
183 #elif defined(__sun) && !defined(AO_NO_INLINE)
184 # define AO_INLINE static inline
186 # define AO_INLINE static
189 #if AO_GNUC_PREREQ(3, 0) && !defined(LINT2)
190 # define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
191 /* Equivalent to (expr) but predict that usually (expr) == 0. */
193 # define AO_EXPECT_FALSE(expr) (expr)
194 #endif /* !__GNUC__ */
196 #if defined(__has_feature)
197 /* __has_feature() is supported. */
198 # if __has_feature(address_sanitizer)
199 # define AO_ADDRESS_SANITIZER
201 # if __has_feature(memory_sanitizer)
202 # define AO_MEMORY_SANITIZER
204 # if __has_feature(thread_sanitizer)
205 # define AO_THREAD_SANITIZER
208 # ifdef __SANITIZE_ADDRESS__
210 # define AO_ADDRESS_SANITIZER
212 #endif /* !__has_feature */
214 #ifndef AO_ATTR_NO_SANITIZE_MEMORY
215 # ifndef AO_MEMORY_SANITIZER
216 # define AO_ATTR_NO_SANITIZE_MEMORY /* empty */
217 # elif AO_CLANG_PREREQ(3, 8)
218 # define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
220 # define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
222 #endif /* !AO_ATTR_NO_SANITIZE_MEMORY */
224 #ifndef AO_ATTR_NO_SANITIZE_THREAD
225 # ifndef AO_THREAD_SANITIZER
226 # define AO_ATTR_NO_SANITIZE_THREAD /* empty */
227 # elif AO_CLANG_PREREQ(3, 8)
228 # define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize("thread")))
230 # define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
232 #endif /* !AO_ATTR_NO_SANITIZE_THREAD */
234 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
235 # define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
236 #elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
237 || defined(__WATCOMC__)
238 # if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
239 # if defined(_WIN32_WCE)
240 /* # include <cmnintrin.h> */
241 # elif defined(_MSC_VER)
244 # pragma intrinsic(_ReadWriteBarrier)
245 # define AO_compiler_barrier() _ReadWriteBarrier()
246 /* We assume this does not generate a fence instruction. */
247 /* The documentation is a bit unclear. */
249 # define AO_compiler_barrier() __asm { }
250 /* The preceding implementation may be preferable here too. */
251 /* But the documentation warns about VC++ 2003 and earlier. */
253 #elif defined(__INTEL_COMPILER)
254 # define AO_compiler_barrier() __memory_barrier()
255 /* FIXME: Too strong? IA64-only? */
256 #elif defined(_HPUX_SOURCE)
258 # include <machine/sys/inline.h>
259 # define AO_compiler_barrier() _Asm_sched_fence()
261 /* FIXME - We do not know how to do this. This is a guess. */
262 /* And probably a bad one. */
263 static volatile int AO_barrier_dummy;
264 # define AO_compiler_barrier() (void)(AO_barrier_dummy = AO_barrier_dummy)
267 /* We conjecture that the following usually gives us the right */
268 /* semantics or an error. */
269 # define AO_compiler_barrier() asm("")
272 #if defined(AO_USE_PTHREAD_DEFS)
273 # include "atomic_ops/sysdeps/generic_pthread.h"
274 #endif /* AO_USE_PTHREAD_DEFS */
276 #if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \
277 && !defined(AO_USE_PTHREAD_DEFS)
278 # include "atomic_ops/sysdeps/armcc/arm_v6.h"
279 # define AO_GENERALIZE_TWICE
282 #if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
283 && !defined(__INTEL_COMPILER)
284 # if defined(__i386__)
285 /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */
286 /* it might require specifying additional options (like -march) */
287 /* or additional link libraries (if -march is not specified). */
288 # include "atomic_ops/sysdeps/gcc/x86.h"
289 # elif defined(__x86_64__)
290 # if AO_GNUC_PREREQ(4, 2) && !defined(AO_USE_SYNC_CAS_BUILTIN)
291 /* It is safe to use __sync CAS built-in on this architecture. */
292 # define AO_USE_SYNC_CAS_BUILTIN
294 # include "atomic_ops/sysdeps/gcc/x86.h"
295 # elif defined(__ia64__)
296 # include "atomic_ops/sysdeps/gcc/ia64.h"
297 # define AO_GENERALIZE_TWICE
298 # elif defined(__hppa__)
299 # include "atomic_ops/sysdeps/gcc/hppa.h"
300 # define AO_CAN_EMUL_CAS
301 # elif defined(__alpha__)
302 # include "atomic_ops/sysdeps/gcc/alpha.h"
303 # define AO_GENERALIZE_TWICE
304 # elif defined(__s390__)
305 # include "atomic_ops/sysdeps/gcc/s390.h"
306 # elif defined(__sparc__)
307 # include "atomic_ops/sysdeps/gcc/sparc.h"
308 # define AO_CAN_EMUL_CAS
309 # elif defined(__m68k__)
310 # include "atomic_ops/sysdeps/gcc/m68k.h"
311 # elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
312 || defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC)
313 # include "atomic_ops/sysdeps/gcc/powerpc.h"
314 # elif defined(__aarch64__)
315 # include "atomic_ops/sysdeps/gcc/aarch64.h"
316 # define AO_CAN_EMUL_CAS
317 # elif defined(__arm__)
318 # include "atomic_ops/sysdeps/gcc/arm.h"
319 # define AO_CAN_EMUL_CAS
320 # elif defined(__cris__) || defined(CRIS)
321 # include "atomic_ops/sysdeps/gcc/cris.h"
322 # define AO_CAN_EMUL_CAS
323 # define AO_GENERALIZE_TWICE
324 # elif defined(__mips__)
325 # include "atomic_ops/sysdeps/gcc/mips.h"
326 # elif defined(__sh__) || defined(SH4)
327 # include "atomic_ops/sysdeps/gcc/sh.h"
328 # define AO_CAN_EMUL_CAS
329 # elif defined(__avr32__)
330 # include "atomic_ops/sysdeps/gcc/avr32.h"
331 # elif defined(__hexagon__)
332 # include "atomic_ops/sysdeps/gcc/hexagon.h"
333 # elif defined(__riscv)
334 # include "atomic_ops/sysdeps/gcc/riscv.h"
335 # elif defined(__tile__)
336 # include "atomic_ops/sysdeps/gcc/tile.h"
337 # else /* __nios2__, etc. */
338 # include "atomic_ops/sysdeps/gcc/generic.h"
340 #endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
342 #if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \
343 && !defined(AO_USE_PTHREAD_DEFS)
344 # if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \
345 || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \
346 || defined(_ARCH_PWR)
347 # include "atomic_ops/sysdeps/ibmc/powerpc.h"
348 # define AO_GENERALIZE_TWICE
352 #if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
353 # if defined(__ia64__)
354 # include "atomic_ops/sysdeps/icc/ia64.h"
355 # define AO_GENERALIZE_TWICE
357 # if defined(__GNUC__)
358 /* Intel Compiler in GCC compatible mode */
359 # if defined(__i386__)
360 # include "atomic_ops/sysdeps/gcc/x86.h"
361 # endif /* __i386__ */
362 # if defined(__x86_64__)
363 # if (__INTEL_COMPILER > 1110) && !defined(AO_USE_SYNC_CAS_BUILTIN)
364 # define AO_USE_SYNC_CAS_BUILTIN
366 # include "atomic_ops/sysdeps/gcc/x86.h"
367 # endif /* __x86_64__ */
371 #if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
373 # include "atomic_ops/sysdeps/hpc/ia64.h"
374 # define AO_GENERALIZE_TWICE
376 # include "atomic_ops/sysdeps/hpc/hppa.h"
377 # define AO_CAN_EMUL_CAS
381 #if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
382 || (defined(__WATCOMC__) && defined(__NT__))
383 # if defined(_AMD64_) || defined(_M_X64)
384 # include "atomic_ops/sysdeps/msftc/x86_64.h"
385 # elif defined(_M_IX86) || defined(x86)
386 # include "atomic_ops/sysdeps/msftc/x86.h"
387 # elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
388 # include "atomic_ops/sysdeps/msftc/arm.h"
389 # define AO_GENERALIZE_TWICE
393 #if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
394 /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
395 # if defined(__i386) || defined(__x86_64) || defined(__amd64)
396 # include "atomic_ops/sysdeps/sunc/x86.h"
400 #if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
401 && !defined(AO_USE_PTHREAD_DEFS)
402 # include "atomic_ops/sysdeps/sunc/sparc.h"
403 # define AO_CAN_EMUL_CAS
406 #if (defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
407 && !defined(AO_HAVE_fetch_compare_and_swap) \
408 && !defined(AO_HAVE_compare_and_swap_full) \
409 && !defined(AO_HAVE_fetch_compare_and_swap_full) \
410 && !defined(AO_HAVE_compare_and_swap_acquire) \
411 && !defined(AO_HAVE_fetch_compare_and_swap_acquire)) || defined(CPPCHECK)
412 # if defined(AO_CAN_EMUL_CAS)
413 # include "atomic_ops/sysdeps/emul_cas.h"
414 # elif !defined(CPPCHECK)
415 # error Cannot implement AO_compare_and_swap_full on this architecture.
417 #endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
419 /* The most common way to clear a test-and-set location */
420 /* at the end of a critical section. */
421 #if AO_AO_TS_T && !defined(AO_HAVE_CLEAR)
422 # define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
423 # define AO_HAVE_CLEAR
425 #if AO_CHAR_TS_T && !defined(AO_HAVE_CLEAR)
426 # define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
427 # define AO_HAVE_CLEAR
430 /* The generalization section. */
431 #if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \
432 && !defined(AO_HAVE_compare_and_swap_full) \
433 && !defined(AO_HAVE_fetch_compare_and_swap_full)
434 # define AO_GENERALIZE_TWICE
437 /* Theoretically we should repeatedly include atomic_ops/generalize.h. */
438 /* In fact, we observe that this converges after a small fixed number */
439 /* of iterations, usually one. */
440 #include "atomic_ops/generalize.h"
442 #if !defined(AO_GENERALIZE_TWICE) \
443 && defined(AO_HAVE_compare_double_and_swap_double) \
444 && (!defined(AO_HAVE_double_load) || !defined(AO_HAVE_double_store))
445 # define AO_GENERALIZE_TWICE
449 /* Included after the first generalization pass. */
450 # include "atomic_ops/sysdeps/ao_t_is_int.h"
451 # ifndef AO_GENERALIZE_TWICE
452 /* Always generalize again. */
453 # define AO_GENERALIZE_TWICE
455 #endif /* AO_T_IS_INT */
457 #ifdef AO_GENERALIZE_TWICE
458 # include "atomic_ops/generalize.h"
461 /* For compatibility with version 0.4 and earlier */
462 #define AO_TS_T AO_TS_t
464 #define AO_TS_VAL AO_TS_VAL_t
466 #endif /* !AO_ATOMIC_OPS_H */