src/google/protobuf/stubs/atomicops_internals_x86_gcc.h

   1 // Protocol Buffers - Google's data interchange format
   2 // Copyright 2012 Google Inc.  All rights reserved.
   3 // https://developers.google.com/protocol-buffers/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 // This file is an internal atomic implementation, use atomicops.h instead.
  32
  33 #ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
  34 #define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
  35
  36 namespace google {
  37 namespace protobuf {
  38 namespace internal {
  39
  40 // This struct is not part of the public API of this module; clients may not
  41 // use it.
  42 // Features of this x86.  Values may not be correct before main() is run,
  43 // but are set conservatively.
  44 struct AtomicOps_x86CPUFeatureStruct {
  45   bool has_amd_lock_mb_bug;  // Processor has AMD memory-barrier bug; do lfence
  46                              // after acquire compare-and-swap.
  47   bool has_sse2;             // Processor has SSE2.
  48 };
  49 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
  50
  51 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
  52
  53 // 32-bit low-level operations on any platform.
  54
  55 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
  56                                          Atomic32 old_value,
  57                                          Atomic32 new_value) {
  58   Atomic32 prev;
  59   __asm__ __volatile__("lock; cmpxchgl %1,%2"
  60                        : "=a" (prev)
  61                        : "q" (new_value), "m" (*ptr), "0" (old_value)
  62                        : "memory");
  63   return prev;
  64 }
  65
  66 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
  67                                          Atomic32 new_value) {
  68   __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
  69                        : "=r" (new_value)
  70                        : "m" (*ptr), "0" (new_value)
  71                        : "memory");
  72   return new_value;  // Now it's the previous value.
  73 }
  74
  75 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
  76                                           Atomic32 increment) {
  77   Atomic32 temp = increment;
  78   __asm__ __volatile__("lock; xaddl %0,%1"
  79                        : "+r" (temp), "+m" (*ptr)
  80                        : : "memory");
  81   // temp now holds the old value of *ptr
  82   return temp + increment;
  83 }
  84
  85 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
  86                                         Atomic32 increment) {
  87   Atomic32 temp = increment;
  88   __asm__ __volatile__("lock; xaddl %0,%1"
  89                        : "+r" (temp), "+m" (*ptr)
  90                        : : "memory");
  91   // temp now holds the old value of *ptr
  92   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
  93     __asm__ __volatile__("lfence" : : : "memory");
  94   }
  95   return temp + increment;
  96 }
  97
  98 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
  99                                        Atomic32 old_value,
 100                                        Atomic32 new_value) {
 101   Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 102   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
 103     __asm__ __volatile__("lfence" : : : "memory");
 104   }
 105   return x;
 106 }
 107
 108 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
 109                                        Atomic32 old_value,
 110                                        Atomic32 new_value) {
 111   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 112 }
 113
 114 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
 115   *ptr = value;
 116 }
 117
 118 #if defined(__x86_64__)
 119
 120 // 64-bit implementations of memory barrier can be simpler, because it
 121 // "mfence" is guaranteed to exist.
 122 inline void MemoryBarrier() {
 123   __asm__ __volatile__("mfence" : : : "memory");
 124 }
 125
 126 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
 127   *ptr = value;
 128   MemoryBarrier();
 129 }
 130
 131 #else
 132
 133 inline void MemoryBarrier() {
 134   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
 135     __asm__ __volatile__("mfence" : : : "memory");
 136   } else {  // mfence is faster but not present on PIII
 137     Atomic32 x = 0;
 138     NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
 139   }
 140 }
 141
 142 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
 143   if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
 144     *ptr = value;
 145     __asm__ __volatile__("mfence" : : : "memory");
 146   } else {
 147     NoBarrier_AtomicExchange(ptr, value);
 148                           // acts as a barrier on PIII
 149   }
 150 }
 151 #endif
 152
 153 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
 154   ATOMICOPS_COMPILER_BARRIER();
 155   *ptr = value;  // An x86 store acts as a release barrier.
 156   // See comments in Atomic64 version of Release_Store(), below.
 157 }
 158
 159 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
 160   return *ptr;
 161 }
 162
 163 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
 164   Atomic32 value = *ptr;  // An x86 load acts as a acquire barrier.
 165   // See comments in Atomic64 version of Release_Store(), below.
 166   ATOMICOPS_COMPILER_BARRIER();
 167   return value;
 168 }
 169
 170 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
 171   MemoryBarrier();
 172   return *ptr;
 173 }
 174
 175 #if defined(__x86_64__)
 176
 177 // 64-bit low-level operations on 64-bit platform.
 178
 179 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
 180                                          Atomic64 old_value,
 181                                          Atomic64 new_value) {
 182   Atomic64 prev;
 183   __asm__ __volatile__("lock; cmpxchgq %1,%2"
 184                        : "=a" (prev)
 185                        : "q" (new_value), "m" (*ptr), "0" (old_value)
 186                        : "memory");
 187   return prev;
 188 }
 189
 190 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
 191                                          Atomic64 new_value) {
 192   __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
 193                        : "=r" (new_value)
 194                        : "m" (*ptr), "0" (new_value)
 195                        : "memory");
 196   return new_value;  // Now it's the previous value.
 197 }
 198
 199 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
 200                                           Atomic64 increment) {
 201   Atomic64 temp = increment;
 202   __asm__ __volatile__("lock; xaddq %0,%1"
 203                        : "+r" (temp), "+m" (*ptr)
 204                        : : "memory");
 205   // temp now contains the previous value of *ptr
 206   return temp + increment;
 207 }
 208
 209 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
 210                                         Atomic64 increment) {
 211   Atomic64 temp = increment;
 212   __asm__ __volatile__("lock; xaddq %0,%1"
 213                        : "+r" (temp), "+m" (*ptr)
 214                        : : "memory");
 215   // temp now contains the previous value of *ptr
 216   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
 217     __asm__ __volatile__("lfence" : : : "memory");
 218   }
 219   return temp + increment;
 220 }
 221
 222 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
 223   *ptr = value;
 224 }
 225
 226 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
 227   *ptr = value;
 228   MemoryBarrier();
 229 }
 230
 231 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
 232   ATOMICOPS_COMPILER_BARRIER();
 233
 234   *ptr = value;  // An x86 store acts as a release barrier
 235                  // for current AMD/Intel chips as of Jan 2008.
 236                  // See also Acquire_Load(), below.
 237
 238   // When new chips come out, check:
 239   //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
 240   //  System Programming Guide, Chatper 7: Multiple-processor management,
 241   //  Section 7.2, Memory Ordering.
 242   // Last seen at:
 243   //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
 244   //
 245   // x86 stores/loads fail to act as barriers for a few instructions (clflush
 246   // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
 247   // not generated by the compiler, and are rare.  Users of these instructions
 248   // need to know about cache behaviour in any case since all of these involve
 249   // either flushing cache lines or non-temporal cache hints.
 250 }
 251
 252 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
 253   return *ptr;
 254 }
 255
 256 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
 257   Atomic64 value = *ptr;  // An x86 load acts as a acquire barrier,
 258                           // for current AMD/Intel chips as of Jan 2008.
 259                           // See also Release_Store(), above.
 260   ATOMICOPS_COMPILER_BARRIER();
 261   return value;
 262 }
 263
 264 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
 265   MemoryBarrier();
 266   return *ptr;
 267 }
 268
 269 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
 270                                        Atomic64 old_value,
 271                                        Atomic64 new_value) {
 272   Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 273   if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
 274     __asm__ __volatile__("lfence" : : : "memory");
 275   }
 276   return x;
 277 }
 278
 279 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
 280                                        Atomic64 old_value,
 281                                        Atomic64 new_value) {
 282   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 283 }
 284
 285 #endif  // defined(__x86_64__)
 286
 287 }  // namespace internal
 288 }  // namespace protobuf
 289 }  // namespace google
 290
 291 #undef ATOMICOPS_COMPILER_BARRIER
 292
 293 #endif  // GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_