1 /* Copyright (c) 2006, Google Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * Author: Sanjay Ghemawat
34 // Implementation of atomic operations for x86. This file should not
35 // be included directly. Clients should instead include
36 // "base/atomicops.h".
38 #ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
39 #define BASE_ATOMICOPS_INTERNALS_X86_H_
41 typedef int32_t Atomic32;
42 #define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic*
45 // NOTE(vchen): x86 does not need to define AtomicWordCastType, because it
46 // already matches Atomic32 or Atomic64, depending on the platform.
49 // This struct is not part of the public API of this module; clients may not
51 // Features of this x86. Values may not be correct before main() is run,
52 // but are set conservatively.
53 struct AtomicOps_x86CPUFeatureStruct {
54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
55 // after acquire compare-and-swap.
56 bool has_sse2; // Processor has SSE2.
57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction.
59 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
62 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
68 typedef int64_t Atomic64;
70 // 32-bit low-level operations on any platform.
72 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
76 __asm__ __volatile__("lock; cmpxchgl %1,%2"
78 : "q" (new_value), "m" (*ptr), "0" (old_value)
83 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
87 : "m" (*ptr), "0" (new_value)
89 return new_value; // Now it's the previous value.
92 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
94 Atomic32 temp = increment;
95 __asm__ __volatile__("lock; xaddl %0,%1"
96 : "+r" (temp), "+m" (*ptr)
98 // temp now holds the old value of *ptr
99 return temp + increment;
102 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
103 Atomic32 increment) {
104 Atomic32 temp = increment;
105 __asm__ __volatile__("lock; xaddl %0,%1"
106 : "+r" (temp), "+m" (*ptr)
108 // temp now holds the old value of *ptr
109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
110 __asm__ __volatile__("lfence" : : : "memory");
112 return temp + increment;
115 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
117 Atomic32 new_value) {
118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
120 __asm__ __volatile__("lfence" : : : "memory");
125 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
127 Atomic32 new_value) {
128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
131 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
135 #if defined(__x86_64__)
137 // 64-bit implementations of memory barrier can be simpler, because it
138 // "mfence" is guaranteed to exist.
139 inline void MemoryBarrier() {
140 __asm__ __volatile__("mfence" : : : "memory");
143 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
150 inline void MemoryBarrier() {
151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
152 __asm__ __volatile__("mfence" : : : "memory");
153 } else { // mfence is faster but not present on PIII
155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
159 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
162 __asm__ __volatile__("mfence" : : : "memory");
164 NoBarrier_AtomicExchange(ptr, value);
165 // acts as a barrier on PIII
170 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
171 ATOMICOPS_COMPILER_BARRIER();
172 *ptr = value; // An x86 store acts as a release barrier.
173 // See comments in Atomic64 version of Release_Store(), below.
176 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
180 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
182 // See comments in Atomic64 version of Release_Store(), below.
183 ATOMICOPS_COMPILER_BARRIER();
187 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
192 #if defined(__x86_64__)
194 // 64-bit low-level operations on 64-bit platform.
196 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
198 Atomic64 new_value) {
200 __asm__ __volatile__("lock; cmpxchgq %1,%2"
202 : "q" (new_value), "m" (*ptr), "0" (old_value)
207 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
208 Atomic64 new_value) {
209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
211 : "m" (*ptr), "0" (new_value)
213 return new_value; // Now it's the previous value.
216 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
217 Atomic64 increment) {
218 Atomic64 temp = increment;
219 __asm__ __volatile__("lock; xaddq %0,%1"
220 : "+r" (temp), "+m" (*ptr)
222 // temp now contains the previous value of *ptr
223 return temp + increment;
226 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
227 Atomic64 increment) {
228 Atomic64 temp = increment;
229 __asm__ __volatile__("lock; xaddq %0,%1"
230 : "+r" (temp), "+m" (*ptr)
232 // temp now contains the previous value of *ptr
233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
234 __asm__ __volatile__("lfence" : : : "memory");
236 return temp + increment;
239 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
243 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
248 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
249 ATOMICOPS_COMPILER_BARRIER();
251 *ptr = value; // An x86 store acts as a release barrier
252 // for current AMD/Intel chips as of Jan 2008.
253 // See also Acquire_Load(), below.
255 // When new chips come out, check:
256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
257 // System Programming Guide, Chatper 7: Multiple-processor management,
258 // Section 7.2, Memory Ordering.
260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
262 // x86 stores/loads fail to act as barriers for a few instructions (clflush
263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
264 // not generated by the compiler, and are rare. Users of these instructions
265 // need to know about cache behaviour in any case since all of these involve
266 // either flushing cache lines or non-temporal cache hints.
269 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
273 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
275 // for current AMD/Intel chips as of Jan 2008.
276 // See also Release_Store(), above.
277 ATOMICOPS_COMPILER_BARRIER();
281 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
286 #else // defined(__x86_64__)
288 // 64-bit low-level operations on 32-bit platform.
290 #if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
291 // For compilers older than gcc 4.1, we use inline asm.
293 // Potential pitfalls:
295 // 1. %ebx points to Global offset table (GOT) with -fPIC.
296 // We need to preserve this register.
297 // 2. When explicit registers are used in inline asm, the
298 // compiler may not be aware of it and might try to reuse
299 // the same register for another argument which has constraints
300 // that allow it ("r" for example).
302 inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
304 Atomic64 new_value) {
306 __asm__ __volatile__("push %%ebx\n\t"
307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into
308 "movl 4(%3), %%ecx\n\t" // ecx:ebx
309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same
311 : "=A" (prev) // as contents of ptr:
312 : "D" (ptr), // ecx:ebx => ptr
313 "0" (old_value), // else:
314 "S" (&new_value) // old *ptr => edx:eax
318 #endif // Compiler < gcc-4.1
320 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
323 return __sync_val_compare_and_swap(ptr, old_val, new_val);
326 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
337 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
338 Atomic64 increment) {
339 Atomic64 old_val, new_val;
343 new_val = old_val + increment;
344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
346 return old_val + increment;
349 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
350 Atomic64 increment) {
351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);
352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
353 __asm__ __volatile__("lfence" : : : "memory");
358 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
361 "emms\n\t" // Empty mmx state/Reset FP regs
364 : // mark the FP stack and mmx registers as clobbered
365 "st", "st(1)", "st(2)", "st(3)", "st(4)",
366 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
370 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
371 NoBarrier_Store(ptr, value);
375 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
376 ATOMICOPS_COMPILER_BARRIER();
377 NoBarrier_Store(ptr, value);
380 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic
383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only)
384 "emms\n\t" // Empty mmx state/Reset FP regs
387 : // mark the FP stack and mmx registers as clobbered
388 "st", "st(1)", "st(2)", "st(3)", "st(4)",
389 "st(5)", "st(6)", "st(7)", "mm0", "mm1",
390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
394 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
395 Atomic64 value = NoBarrier_Load(ptr);
396 ATOMICOPS_COMPILER_BARRIER();
400 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
402 return NoBarrier_Load(ptr);
405 #endif // defined(__x86_64__)
407 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
409 Atomic64 new_value) {
410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
412 __asm__ __volatile__("lfence" : : : "memory");
417 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
419 Atomic64 new_value) {
420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
423 } // namespace base::subtle
426 #undef ATOMICOPS_COMPILER_BARRIER
428 #endif // BASE_ATOMICOPS_INTERNALS_X86_H_